diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..84c84d3128
Binary files /dev/null and b/.gitignore differ
diff --git a/Algo256/cuda_blake256.cu b/Algo256/cuda_blake256.cu
new file mode 100644
index 0000000000..f0af0fcdc9
--- /dev/null
+++ b/Algo256/cuda_blake256.cu
@@ -0,0 +1,251 @@
+/**
+ * Blake-256 Cuda Kernel (Tested on SM 5.0)
+ *
+ * Tanguy Pruvot - Nov. 2014
+ */
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+extern int compute_version[8];
+#include "cuda_helper.h"
+
+__constant__ static uint32_t  c_data[20];
+
+__constant__ static uint32_t sigma[16][16];
+static uint32_t  c_sigma[16][16] = {
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+		{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+		{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+		{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+		{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+		{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+		{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+		{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+		{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+		{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+		{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
+};
+
+
+static const uint32_t  c_IV256[8] = {
+	0x6A09E667, 0xBB67AE85,
+	0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C,
+	0x1F83D9AB, 0x5BE0CD19
+};
+
+__device__ __constant__ static uint32_t cpu_h[8];
+
+__device__ __constant__ static  uint32_t  u256[16];
+static const uint32_t  c_u256[16] = {
+	0x243F6A88, 0x85A308D3,
+	0x13198A2E, 0x03707344,
+	0xA4093822, 0x299F31D0,
+	0x082EFA98, 0xEC4E6C89,
+	0x452821E6, 0x38D01377,
+	0xBE5466CF, 0x34E90C6C,
+	0xC0AC29B7, 0xC97C50DD,
+	0x3F84D5B5, 0xB5470917
+};
+
+#define GS2(a,b,c,d,x) { \
+	const uint32_t idx1 = sigma[r][x]; \
+	const uint32_t idx2 = sigma[r][x+1]; \
+	v[a] += (m[idx1] ^ u256[idx2]) + v[b]; \
+	v[d] = SPH_ROTL32(v[d] ^ v[a], 16); \
+	v[c] += v[d]; \
+	v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \
+\
+	v[a] += (m[idx2] ^ u256[idx1]) + v[b]; \
+	v[d] = SPH_ROTR32(v[d] ^ v[a], 8); \
+	v[c] += v[d]; \
+	v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \
+}
+//#define ROTL32(x, n) ((x) << (n)) | ((x) >> (32 - (n)))
+#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+#define hostGS(a,b,c,d,x) { \
+	const uint32_t idx1 = c_sigma[r][x]; \
+	const uint32_t idx2 = c_sigma[r][x+1]; \
+	v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \
+	v[d] = ROTR32(v[d] ^ v[a], 16); \
+	v[c] += v[d]; \
+	v[b] = ROTR32(v[b] ^ v[c], 12); \
+\
+	v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \
+	v[d] = ROTR32(v[d] ^ v[a], 8); \
+	v[c] += v[d]; \
+	v[b] = ROTR32(v[b] ^ v[c], 7); \
+	}
+
+/* Second part (64-80) msg never change, store it */
+__device__ __constant__ static const uint32_t  c_Padding[16] = {
+	0, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 1, 0, 640,
+};
+
+__host__ __forceinline__ static void blake256_compress1st(uint32_t *h, const uint32_t *block, const uint32_t T0)
+{
+	uint32_t m[16];
+	uint32_t v[16];
+
+
+	for (int i = 0; i < 16; i++) {
+		m[i] = block[i];
+	}
+
+
+	for (int i = 0; i < 8; i++)
+		v[i] = h[i];
+
+	v[8] = c_u256[0];
+	v[9] = c_u256[1];
+	v[10] = c_u256[2];
+	v[11] = c_u256[3];
+
+	v[12] = c_u256[4] ^ T0;
+	v[13] = c_u256[5] ^ T0;
+	v[14] = c_u256[6];
+	v[15] = c_u256[7];
+
+
+	for (int r = 0; r < 14; r++) {
+		/* column step */
+		hostGS(0, 4, 0x8, 0xC, 0x0);
+		hostGS(1, 5, 0x9, 0xD, 0x2);
+		hostGS(2, 6, 0xA, 0xE, 0x4);
+		hostGS(3, 7, 0xB, 0xF, 0x6);
+		/* diagonal step */
+		hostGS(0, 5, 0xA, 0xF, 0x8);
+		hostGS(1, 6, 0xB, 0xC, 0xA);
+		hostGS(2, 7, 0x8, 0xD, 0xC);
+		hostGS(3, 4, 0x9, 0xE, 0xE);
+	}
+
+	for (int i = 0; i < 16; i++) {
+		int j = i & 7;
+		h[j] ^= v[i];
+	}
+}
+
+void blake256_cpu_init(int thr_id, int threads)
+{
+
+	cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice);
+}
+
+__device__ __forceinline__ static void blake256_compress2nd(uint32_t *h, const uint32_t *block, const uint32_t T0)
+{
+	uint32_t m[16];
+	uint32_t v[16];
+
+	m[0] = block[0];
+	m[1] = block[1];
+	m[2] = block[2];
+	m[3] = block[3];
+
+#pragma unroll 
+	for (int i = 4; i < 16; i++) {
+		m[i] = c_Padding[i];
+	}
+
+#pragma unroll 8
+	for (int i = 0; i < 8; i++)
+		v[i] = h[i];
+
+	v[8] =  u256[0];
+	v[9] =  u256[1];
+	v[10] = u256[2];
+	v[11] = u256[3];
+
+	v[12] = u256[4] ^ T0;
+	v[13] = u256[5] ^ T0;
+	v[14] = u256[6];
+	v[15] = u256[7];
+
+#pragma unroll 14
+	for (int r = 0; r < 14; r++) {
+		/* column step */
+		GS2(0, 4, 0x8, 0xC, 0x0);
+		GS2(1, 5, 0x9, 0xD, 0x2);
+		GS2(2, 6, 0xA, 0xE, 0x4);
+		GS2(3, 7, 0xB, 0xF, 0x6);
+		/* diagonal step */
+		GS2(0, 5, 0xA, 0xF, 0x8);
+		GS2(1, 6, 0xB, 0xC, 0xA);
+		GS2(2, 7, 0x8, 0xD, 0xC);
+		GS2(3, 4, 0x9, 0xE, 0xE);
+	}
+#pragma unroll 16
+	for (int i = 0; i < 16; i++) {
+		 int j = i & 7;
+		h[j] ^= v[i];
+	}
+}
+
+
+__global__ __launch_bounds__(256,3) void blake256_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint64_t * Hash)
+{
+	uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+		uint32_t h[8];
+
+		uint32_t input[4];
+        #pragma unroll 8
+		for (int i = 0; i<8; i++) { h[i] = cpu_h[i];}
+        #pragma unroll 3
+		for (int i = 0; i < 3; ++i) input[i] = c_data[16 + i];
+		input[3] = nonce;
+		blake256_compress2nd(h, input, 640);
+
+
+        #pragma unroll 
+for (int i = 0; i<4; i++) { Hash[i*threads + thread] = cuda_swab32ll(MAKE_ULONGLONG(h[2 * i], h[2*i+1])); }
+
+	}
+}
+
+__host__ void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order)
+{
+	const int threadsperblock = 256;
+	
+
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+	size_t shared_size = 0;
+
+	
+	blake256_gpu_hash_80 << <grid, block, shared_size >> >(threads, startNonce, Hash);
+	MyStreamSynchronize(NULL, order, thr_id);
+
+}
+
+__host__ void blake256_cpu_setBlock_80(uint32_t *pdata)
+{
+	uint32_t data[20];
+	memcpy(data, pdata, 80);
+	uint32_t h[8];
+	for (int i = 0; i<8; i++) { h[i] = c_IV256[i]; }
+	blake256_compress1st(h, pdata, 512);
+	cudaMemcpyToSymbol(cpu_h, h, sizeof(h), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice);
+}
+
diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu
new file mode 100644
index 0000000000..1d776a23e1
--- /dev/null
+++ b/Algo256/cuda_groestl256.cu
@@ -0,0 +1,372 @@
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+uint32_t *d_gnounce[8];
+uint32_t *d_GNonce[8];
+__constant__ uint32_t pTarget[8];
+
+#include "cuda_helper.h"
+
+
+////////////////////////////////////////////////////////////////////////
+
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#define C32e(x)     ((SPH_C32(x) >> 24) \
+                    | ((SPH_C32(x) >>  8) & SPH_C32(0x0000FF00)) \
+                    | ((SPH_C32(x) <<  8) & SPH_C32(0x00FF0000)) \
+                    | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
+
+#define PC32up(j, r)   ((uint32_t)((j) + (r)))
+#define PC32dn(j, r)   0
+#define QC32up(j, r)   0xFFFFFFFF
+#define QC32dn(j, r)   (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24)))
+
+#define B32_0(x)    __byte_perm(x, 0, 0x4440)
+//((x) & 0xFF)
+#define B32_1(x)    __byte_perm(x, 0, 0x4441)
+//(((x) >> 8) & 0xFF)
+#define B32_2(x)    __byte_perm(x, 0, 0x4442)
+//(((x) >> 16) & 0xFF)
+#define B32_3(x)    __byte_perm(x, 0, 0x4443)
+//((x) >> 24)
+#define MAXWELL_OR_FERMI 1
+#if MAXWELL_OR_FERMI
+#define USE_SHARED 1
+// Maxwell and Fermi cards get the best speed with SHARED access it seems.
+#if USE_SHARED
+#define T0up(x) (*((uint32_t*)mixtabs + (    (x))))
+#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x))))
+#define T1up(x) (*((uint32_t*)mixtabs + (512+(x))))
+#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
+#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x))))
+#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
+#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
+#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x))))
+#else
+#define T0up(x) tex1Dfetch(t0up2, x)
+#define T0dn(x) tex1Dfetch(t0dn2, x)
+#define T1up(x) tex1Dfetch(t1up2, x)
+#define T1dn(x) tex1Dfetch(t1dn2, x)
+#define T2up(x) tex1Dfetch(t2up2, x)
+#define T2dn(x) tex1Dfetch(t2dn2, x)
+#define T3up(x) tex1Dfetch(t3up2, x)
+#define T3dn(x) tex1Dfetch(t3dn2, x)
+#endif
+#else
+#define USE_SHARED 1
+// a healthy mix between shared and textured access provides the highest speed on Compute 3.0 and 3.5!
+#define T0up(x) (*((uint32_t*)mixtabs + (    (x))))
+#define T0dn(x) tex1Dfetch(t0dn2, x)
+#define T1up(x) tex1Dfetch(t1up2, x)
+#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x))))
+#define T2up(x) tex1Dfetch(t2up2, x)
+#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x))))
+#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x))))
+#define T3dn(x) tex1Dfetch(t3dn2, x)
+#endif
+
+texture<unsigned int, 1, cudaReadModeElementType> t0up2;
+texture<unsigned int, 1, cudaReadModeElementType> t0dn2;
+texture<unsigned int, 1, cudaReadModeElementType> t1up2;
+texture<unsigned int, 1, cudaReadModeElementType> t1dn2;
+texture<unsigned int, 1, cudaReadModeElementType> t2up2;
+texture<unsigned int, 1, cudaReadModeElementType> t2dn2;
+texture<unsigned int, 1, cudaReadModeElementType> t3up2;
+texture<unsigned int, 1, cudaReadModeElementType> t3dn2;
+
+
+#define RSTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7)   do { \
+		t[d0] = T0up(B32_0(a[b0])) \
+			^ T1up(B32_1(a[b1])) \
+			^ T2up(B32_2(a[b2])) \
+			^ T3up(B32_3(a[b3])) \
+			^ T0dn(B32_0(a[b4])) \
+			^ T1dn(B32_1(a[b5])) \
+			^ T2dn(B32_2(a[b6])) \
+			^ T3dn(B32_3(a[b7])); \
+		t[d1] = T0dn(B32_0(a[b0])) \
+			^ T1dn(B32_1(a[b1])) \
+			^ T2dn(B32_2(a[b2])) \
+			^ T3dn(B32_3(a[b3])) \
+			^ T0up(B32_0(a[b4])) \
+			^ T1up(B32_1(a[b5])) \
+			^ T2up(B32_2(a[b6])) \
+			^ T3up(B32_3(a[b7])); \
+	} while (0)
+
+
+extern uint32_t T0up_cpu[];
+extern uint32_t T0dn_cpu[];
+extern uint32_t T1up_cpu[];
+extern uint32_t T1dn_cpu[];
+extern uint32_t T2up_cpu[];
+extern uint32_t T2dn_cpu[];
+extern uint32_t T3up_cpu[];
+extern uint32_t T3dn_cpu[];
+
+
+__device__ __forceinline__ void groestl256_perm_P(uint32_t *a, uint32_t *mixtabs)
+{
+	
+        uint32_t t[16];
+	#pragma unroll 
+	for (int r = 0; r<10; r++)
+	{
+
+		    a[0x0] ^= PC32up(0x00, r); 
+			a[0x2] ^= PC32up(0x10, r); 
+			a[0x4] ^= PC32up(0x20, r); 
+			a[0x6] ^= PC32up(0x30, r); 
+			a[0x8] ^= PC32up(0x40, r);
+			a[0xA] ^= PC32up(0x50, r); 
+			a[0xC] ^= PC32up(0x60, r); 
+			a[0xE] ^= PC32up(0x70, r); 
+			RSTT(0x0, 0x1, a, 0x0, 0x2, 0x4, 0x6, 0x9, 0xB, 0xD, 0xF); 
+			RSTT(0x2, 0x3, a, 0x2, 0x4, 0x6, 0x8, 0xB, 0xD, 0xF, 0x1); 
+			RSTT(0x4, 0x5, a, 0x4, 0x6, 0x8, 0xA, 0xD, 0xF, 0x1, 0x3); 
+			RSTT(0x6, 0x7, a, 0x6, 0x8, 0xA, 0xC, 0xF, 0x1, 0x3, 0x5); 
+			RSTT(0x8, 0x9, a, 0x8, 0xA, 0xC, 0xE, 0x1, 0x3, 0x5, 0x7); 
+			RSTT(0xA, 0xB, a, 0xA, 0xC, 0xE, 0x0, 0x3, 0x5, 0x7, 0x9); 
+			RSTT(0xC, 0xD, a, 0xC, 0xE, 0x0, 0x2, 0x5, 0x7, 0x9, 0xB); 
+			RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD); 
+
+#pragma unroll 
+		for (int k = 0; k<16; k++)
+			a[k] = t[k];
+
+	}
+}
+
+__device__ __forceinline__ void groestl256_perm_Pf( uint32_t *a, uint32_t *mixtabs)
+{
+
+	uint32_t t[16];
+#pragma unroll 
+	for (int r = 0; r<9; r++)
+	{
+		a[0x0] ^= PC32up(0x00, r);
+		a[0x2] ^= PC32up(0x10, r);
+		a[0x4] ^= PC32up(0x20, r);
+		a[0x6] ^= PC32up(0x30, r);
+		a[0x8] ^= PC32up(0x40, r);
+		a[0xA] ^= PC32up(0x50, r);
+		a[0xC] ^= PC32up(0x60, r);
+		a[0xE] ^= PC32up(0x70, r);
+		RSTT(0x0, 0x1, a, 0x0, 0x2, 0x4, 0x6, 0x9, 0xB, 0xD, 0xF);
+		RSTT(0x2, 0x3, a, 0x2, 0x4, 0x6, 0x8, 0xB, 0xD, 0xF, 0x1);
+		RSTT(0x4, 0x5, a, 0x4, 0x6, 0x8, 0xA, 0xD, 0xF, 0x1, 0x3);
+		RSTT(0x6, 0x7, a, 0x6, 0x8, 0xA, 0xC, 0xF, 0x1, 0x3, 0x5);
+		RSTT(0x8, 0x9, a, 0x8, 0xA, 0xC, 0xE, 0x1, 0x3, 0x5, 0x7);
+		RSTT(0xA, 0xB, a, 0xA, 0xC, 0xE, 0x0, 0x3, 0x5, 0x7, 0x9);
+		RSTT(0xC, 0xD, a, 0xC, 0xE, 0x0, 0x2, 0x5, 0x7, 0x9, 0xB);
+		RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD);
+
+#pragma unroll 
+		for (int k = 0; k<16; k++)
+			a[k] = t[k];
+
+	}
+	a[0x0] ^= 0x09;
+	a[0x2] ^= 0x19;
+	a[0x4] ^= 0x29;
+	a[0xE] ^= 0x79;
+	RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD);
+	    a[14] = t[14];
+		a[15] = t[15];
+
+}
+
+__device__ __forceinline__ void groestl256_perm_Q(uint32_t *a, uint32_t *mixtabs)
+{
+		uint32_t t[16];
+	#pragma unroll 
+	for (int r = 0; r<10; r++)
+	{
+		    a[0x0] = ~a[0x0];
+			a[0x1] ^= QC32dn(0x00, r); 
+			a[0x2] = ~a[0x2];
+			a[0x3] ^= QC32dn(0x10, r); 
+			a[0x4] = ~a[0x4];
+			a[0x5] ^= QC32dn(0x20, r); 
+			a[0x6] = ~a[0x6];
+			a[0x7] ^= QC32dn(0x30, r); 
+			a[0x8] = ~a[0x8];
+			a[0x9] ^= QC32dn(0x40, r); 
+			a[0xA] = ~a[0xA];
+			a[0xB] ^= QC32dn(0x50, r); 
+			a[0xC] = ~a[0xC];
+			a[0xD] ^= QC32dn(0x60, r); 
+			a[0xE] = ~a[0xE];
+			a[0xF] ^= QC32dn(0x70, r); 
+			RSTT(0x0, 0x1, a, 0x2, 0x6, 0xA, 0xE, 0x1, 0x5, 0x9, 0xD); 
+			RSTT(0x2, 0x3, a, 0x4, 0x8, 0xC, 0x0, 0x3, 0x7, 0xB, 0xF); 
+			RSTT(0x4, 0x5, a, 0x6, 0xA, 0xE, 0x2, 0x5, 0x9, 0xD, 0x1); 
+			RSTT(0x6, 0x7, a, 0x8, 0xC, 0x0, 0x4, 0x7, 0xB, 0xF, 0x3); 
+			RSTT(0x8, 0x9, a, 0xA, 0xE, 0x2, 0x6, 0x9, 0xD, 0x1, 0x5); 
+			RSTT(0xA, 0xB, a, 0xC, 0x0, 0x4, 0x8, 0xB, 0xF, 0x3, 0x7); 
+			RSTT(0xC, 0xD, a, 0xE, 0x2, 0x6, 0xA, 0xD, 0x1, 0x5, 0x9); 
+			RSTT(0xE, 0xF, a, 0x0, 0x4, 0x8, 0xC, 0xF, 0x3, 0x7, 0xB); 
+
+#pragma unroll 
+		for (int k = 0; k<16; k++)
+			a[k] = t[k];
+
+
+	}
+}
+
+__global__ __launch_bounds__(256,1) void groestl256_gpu_hash32(int threads, uint32_t startNounce, uint64_t *outputHash, uint32_t *nonceVector)
+{
+	
+#if USE_SHARED
+  extern __shared__ uint32_t mixtabs[];
+
+	if (threadIdx.x < 256)
+	{
+		*((uint32_t*)mixtabs + (threadIdx.x)) = tex1Dfetch(t0up2, threadIdx.x);
+		*((uint32_t*)mixtabs + (256 + threadIdx.x)) = tex1Dfetch(t0dn2, threadIdx.x);
+		*((uint32_t*)mixtabs + (512 + threadIdx.x)) = tex1Dfetch(t1up2, threadIdx.x);
+		*((uint32_t*)mixtabs + (768 + threadIdx.x)) = tex1Dfetch(t1dn2, threadIdx.x);
+		*((uint32_t*)mixtabs + (1024 + threadIdx.x)) = tex1Dfetch(t2up2, threadIdx.x);
+		*((uint32_t*)mixtabs + (1280 + threadIdx.x)) = tex1Dfetch(t2dn2, threadIdx.x);
+		*((uint32_t*)mixtabs + (1536 + threadIdx.x)) = tex1Dfetch(t3up2, threadIdx.x);
+		*((uint32_t*)mixtabs + (1792 + threadIdx.x)) = tex1Dfetch(t3dn2, threadIdx.x);
+	}
+
+	__syncthreads();
+#endif
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		// GROESTL
+		uint32_t message[16];
+		uint32_t state[16];
+
+		uint32_t nonce = startNounce +thread;
+
+#pragma unroll 
+		for (int k = 0; k<4; k++) LOHI(message[2*k],message[2*k+1],outputHash[k*threads+thread]);
+#pragma unroll 
+		for (int k = 9; k<15; k++)
+			message[k] = 0;
+
+		message[8] = 0x80;
+		message[15] = 0x01000000;
+
+#pragma unroll 16
+		for (int u = 0; u<16; u++) state[u] = message[u];
+		state[15] ^= 0x10000;
+
+		// Perm
+		
+#if USE_SHARED
+		
+		groestl256_perm_P(state, mixtabs);		
+		state[15] ^= 0x10000;		
+		groestl256_perm_Q(message, mixtabs);
+		
+#else
+		groestl256_perm_P(state, NULL);
+		state[15] ^= 0x10000;
+		groestl256_perm_Q(message, NULL);
+#endif
+#pragma unroll 16
+		for (int u = 0; u<16; u++) state[u] ^= message[u];
+		message[14] = state[14];
+		message[15] = state[15];
+
+#if USE_SHARED
+		groestl256_perm_Pf(state, mixtabs);
+#else
+		groestl256_perm_Pf(state, NULL);
+#endif
+state[14] ^= message[14];
+state[15] ^= message[15];
+
+		if (((uint64_t*)state)[7] <= ((uint64_t*)pTarget)[3]) { nonceVector[0] = nonce; }
+}
+}
+
+
+#define texDef(texname, texmem, texsource, texsize) \
+	unsigned int *texmem; \
+	cudaMalloc(&texmem, texsize); \
+	cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
+	texname.normalized = 0; \
+	texname.filterMode = cudaFilterModePoint; \
+	texname.addressMode[0] = cudaAddressModeClamp; \
+	{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
+	  cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
+
+
+
+
+
+   
+void groestl256_cpu_init(int thr_id, int threads)
+{
+    
+	// Texturen mit obigem Makro initialisieren
+	texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
+	texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
+	texDef(t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256);
+	texDef(t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256);
+	texDef(t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256);
+	texDef(t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256);
+	texDef(t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256);
+	texDef(t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256);
+
+	cudaMalloc(&d_GNonce[thr_id], sizeof(uint32_t)); 
+	cudaMallocHost(&d_gnounce[thr_id], 1*sizeof(uint32_t));
+} 
+
+
+__host__ uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_GNonce[thr_id], 0xff, sizeof(uint32_t));
+
+
+
+	const int threadsperblock = 256;  
+
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+#if USE_SHARED
+	size_t shared_size = 8 * 256 * sizeof(uint32_t);
+#else
+	size_t shared_size = 0;
+#endif	
+	groestl256_gpu_hash32<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_GNonce[thr_id]);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_gnounce[thr_id], d_GNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+//	cudaThreadSynchronize();
+	result = *d_gnounce[thr_id];
+
+	return result;
+}
+
+
+__host__ void groestl256_setTarget(const void *pTargetIn)
+{
+	cudaMemcpyToSymbol(pTarget, pTargetIn, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+}
\ No newline at end of file
diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu
new file mode 100644
index 0000000000..b24263b4b9
--- /dev/null
+++ b/Algo256/cuda_keccak256.cu
@@ -0,0 +1,327 @@
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+#include "cuda_helper.h"
+
+static const uint64_t host_keccak_round_constants[24] = {
+    0x0000000000000001ull, 0x0000000000008082ull,
+    0x800000000000808aull, 0x8000000080008000ull,
+    0x000000000000808bull, 0x0000000080000001ull,
+    0x8000000080008081ull, 0x8000000000008009ull,
+    0x000000000000008aull, 0x0000000000000088ull,
+    0x0000000080008009ull, 0x000000008000000aull,
+    0x000000008000808bull, 0x800000000000008bull,
+    0x8000000000008089ull, 0x8000000000008003ull,
+    0x8000000000008002ull, 0x8000000000000080ull,
+    0x000000000000800aull, 0x800000008000000aull,
+    0x8000000080008081ull, 0x8000000000008080ull,
+    0x0000000080000001ull, 0x8000000080008008ull
+};
+uint32_t *d_nounce[8];
+uint32_t *d_KNonce[8];
+__constant__ uint32_t pTarget[8];
+__constant__ uint64_t keccak_round_constants[24];
+
+
+ __constant__ uint64_t c_PaddedMessage80[10]; // padded message (80 bytes + padding)
+
+
+static __device__ __forceinline__ void keccak_blockv35(uint2 *s, const uint64_t *keccak_round_constants) {
+	size_t i;
+	uint2 t[5], u[5], v, w;
+	
+
+
+    #pragma unroll
+	for (i = 0; i < 24; i++) {
+		/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+		t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+		t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+		t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+		t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+		t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+		/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+		u[0] = t[4] ^ ROL2(t[1], 1);
+		u[1] = t[0] ^ ROL2(t[2], 1);
+		u[2] = t[1] ^ ROL2(t[3], 1);
+		u[3] = t[2] ^ ROL2(t[4], 1);
+		u[4] = t[3] ^ ROL2(t[0], 1);
+
+		/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+		s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+		s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+		s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+		s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+		s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+		/* rho pi: b[..] = rotl(a[..], ..) */
+		v = s[1];
+		s[1] = ROL2(s[6], 44);
+		s[6] = ROL2(s[9], 20);
+		s[9] = ROL2(s[22], 61);
+		s[22] = ROL2(s[14], 39);
+		s[14] = ROL2(s[20], 18);
+		s[20] = ROL2(s[2], 62);
+		s[2] = ROL2(s[12], 43);
+		s[12] = ROL2(s[13], 25);
+		s[13] = ROL2(s[19], 8);
+		s[19] = ROL2(s[23], 56);
+		s[23] = ROL2(s[15], 41);
+		s[15] = ROL2(s[4], 27);
+		s[4] = ROL2(s[24], 14);
+		s[24] = ROL2(s[21], 2);
+		s[21] = ROL2(s[8], 55);
+		s[8] = ROL2(s[16], 45);
+		s[16] = ROL2(s[5], 36);
+		s[5] = ROL2(s[3], 28);
+		s[3] = ROL2(s[18], 21);
+		s[18] = ROL2(s[17], 15);
+		s[17] = ROL2(s[11], 10);
+		s[11] = ROL2(s[7], 6);
+		s[7] = ROL2(s[10], 3);
+		s[10] = ROL2(v, 1);
+
+		/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
+		v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w;
+		v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w;
+		v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
+		v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
+		v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
+
+		/* iota: a[0,0] ^= round constant */
+		s[0] ^= vectorize(keccak_round_constants[i]);
+	}
+}
+
+static __device__ __forceinline__ void keccak_blockv30(uint64_t *s, const uint64_t *keccak_round_constants) {
+	size_t i;
+	uint64_t t[5], u[5], v, w;
+
+#pragma unroll
+
+	//    #pragma unroll
+	for (i = 0; i < 24; i++) {
+		/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+		t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+		t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+		t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+		t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+		t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+		/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+		u[0] = t[4] ^ ROTL64(t[1], 1);
+		u[1] = t[0] ^ ROTL64(t[2], 1);
+		u[2] = t[1] ^ ROTL64(t[3], 1);
+		u[3] = t[2] ^ ROTL64(t[4], 1);
+		u[4] = t[3] ^ ROTL64(t[0], 1);
+
+		/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+		s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+		s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+		s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+		s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+		s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+		/* rho pi: b[..] = rotl(a[..], ..) */
+		v = s[1];
+		s[1] = ROTL64(s[6], 44);
+		s[6] = ROTL64(s[9], 20);
+		s[9] = ROTL64(s[22], 61);
+		s[22] = ROTL64(s[14], 39);
+		s[14] = ROTL64(s[20], 18);
+		s[20] = ROTL64(s[2], 62);
+		s[2] = ROTL64(s[12], 43);
+		s[12] = ROTL64(s[13], 25);
+		s[13] = ROTL64(s[19], 8);
+		s[19] = ROTL64(s[23], 56);
+		s[23] = ROTL64(s[15], 41);
+		s[15] = ROTL64(s[4], 27);
+		s[4] = ROTL64(s[24], 14);
+		s[24] = ROTL64(s[21], 2);
+		s[21] = ROTL64(s[8], 55);
+		s[8] = ROTL64(s[16], 45);
+		s[16] = ROTL64(s[5], 36);
+		s[5] = ROTL64(s[3], 28);
+		s[3] = ROTL64(s[18], 21);
+		s[18] = ROTL64(s[17], 15);
+		s[17] = ROTL64(s[11], 10);
+		s[11] = ROTL64(s[7], 6);
+		s[7] = ROTL64(s[10], 3);
+		s[10] = ROTL64(v, 1);
+
+		/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
+		v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w;
+		v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w;
+		v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
+		v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
+		v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
+
+		/* iota: a[0,0] ^= round constant */
+		s[0] ^= keccak_round_constants[i];
+	}
+}
+
+__global__ void __launch_bounds__(256,3) keccak256_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
+{
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+		uint32_t nounce = startNounce + thread;
+#if __CUDA_ARCH__ >= 350
+
+           uint2 keccak_gpu_state[25];
+           #pragma unroll 25
+           for (int i=0; i<25; i++) {
+			if(i<9) {keccak_gpu_state[i] = vectorize(c_PaddedMessage80[i]);}
+			else    {keccak_gpu_state[i] = make_uint2(0,0);}}
+		   keccak_gpu_state[9]= vectorize(c_PaddedMessage80[9]);
+		   keccak_gpu_state[9].y = cuda_swab32(nounce);
+           keccak_gpu_state[10]=make_uint2(1,0);
+		   keccak_gpu_state[16]=make_uint2(0,0x80000000);
+           keccak_blockv35(keccak_gpu_state,keccak_round_constants);
+		
+			if (devectorize(keccak_gpu_state[3]) <= ((uint64_t*)pTarget)[3]) {resNounce[0] = nounce;}		
+
+#else 
+
+			uint64_t keccak_gpu_state[25];
+            #pragma unroll 25
+			for (int i = 0; i<25; i++) {
+				if (i<9) { keccak_gpu_state[i] = c_PaddedMessage80[i]; }
+				else    { keccak_gpu_state[i] = 0; }
+			}
+			keccak_gpu_state[9] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce));
+			keccak_gpu_state[10] = 0x0000000000000001;
+			keccak_gpu_state[16] = 0x8000000000000000;
+
+			keccak_blockv30(keccak_gpu_state, keccak_round_constants);
+			if (keccak_gpu_state[3] <= ((uint64_t*)pTarget)[3]) { resNounce[0] = nounce; }
+#endif
+
+
+	} //thread
+}
+
+__global__ void __launch_bounds__(256,3) keccak256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+
+		uint2 keccak_gpu_state[25];
+#pragma unroll 25
+		for (int i = 0; i<25; i++) {
+			if (i<4) { keccak_gpu_state[i] = vectorize(outputHash[i*threads+thread]); }
+			else    { keccak_gpu_state[i] =  make_uint2(0, 0 ); }
+		}
+		keccak_gpu_state[4] = make_uint2( 1, 0 );
+		keccak_gpu_state[16] = make_uint2( 0, 0x80000000);
+		keccak_blockv35(keccak_gpu_state, keccak_round_constants);
+
+#pragma unroll 4
+		for (int i=0; i<4;i++) {
+outputHash[i*threads+thread]=devectorize(keccak_gpu_state[i]);} 
+
+
+	} //thread
+}
+
+  
+__global__ void __launch_bounds__(256, 3) keccak256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+		uint64_t keccak_gpu_state[25];
+#pragma unroll 25
+		for (int i = 0; i<25; i++) {
+			if (i<4) { keccak_gpu_state[i] = outputHash[i*threads + thread]; }
+			else    { keccak_gpu_state[i] = 0; }
+		}
+		keccak_gpu_state[4] = 0x0000000000000001;
+		keccak_gpu_state[16] = 0x8000000000000000;
+
+		keccak_blockv30(keccak_gpu_state, keccak_round_constants);
+#pragma unroll 4
+		for (int i = 0; i<4; i++) { outputHash[i*threads + thread] = keccak_gpu_state[i]; }
+
+	} //thread
+}
+
+
+ 
+void keccak256_cpu_init(int thr_id, int threads)
+{
+    
+	
+	cudaMemcpyToSymbol( keccak_round_constants, host_keccak_round_constants, sizeof(host_keccak_round_constants),0, cudaMemcpyHostToDevice);
+	cudaMalloc(&d_KNonce[thr_id], sizeof(uint32_t)); 
+	cudaMallocHost(&d_nounce[thr_id], 1*sizeof(uint32_t));
+} 
+
+__host__ uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
+{
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_KNonce[thr_id], 0xff, sizeof(uint32_t));
+	const int threadsperblock = 256; 
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	keccak256_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_KNonce[thr_id]);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_nounce[thr_id], d_KNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	cudaThreadSynchronize();
+	result = *d_nounce[thr_id];
+
+	return result;
+}
+
+__host__ void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+	
+	const int threadsperblock = 256;
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	if (compute_version[thr_id] >= 35) {
+	keccak256_gpu_hash_32 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	}
+	else {
+	keccak256_gpu_hash_32_v30 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	}
+	MyStreamSynchronize(NULL, order, thr_id);
+
+}
+
+__host__ void keccak256_setBlock_80(void *pdata,const void *pTargetIn)
+{
+	unsigned char PaddedMessage[80];
+	memcpy(PaddedMessage, pdata, 80);
+	cudaMemcpyToSymbol( pTarget, pTargetIn, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 10*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
\ No newline at end of file
diff --git a/Algo256/cuda_skein256.cu b/Algo256/cuda_skein256.cu
new file mode 100644
index 0000000000..82cdb96e6b
--- /dev/null
+++ b/Algo256/cuda_skein256.cu
@@ -0,0 +1,311 @@
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+#include "cuda_helper.h"
+
+static __constant__ uint64_t SKEIN_IV512_256[8] = {
+	0xCCD044A12FDB3E13, 0xE83590301A79A9EB,
+	0x55AEA0614F816E6F, 0x2A2767A4AE9B94DB,
+	0xEC06025E74DD7683, 0xE7A436CDC4746251,
+	0xC36FBAF9393AD185, 0x3EEDBA1833EDFC13
+};
+
+static __constant__ uint2 vSKEIN_IV512_256[8] = {
+	{0x2FDB3E13, 0xCCD044A1 }, 
+    {0x1A79A9EB, 0xE8359030 },
+	{0x4F816E6F, 0x55AEA061 }, 
+    {0xAE9B94DB, 0x2A2767A4 },
+	{0x74DD7683, 0xEC06025E }, 
+    {0xC4746251, 0xE7A436CD },
+	{0x393AD185, 0xC36FBAF9 }, 
+    {0x33EDFC13, 0x3EEDBA18 }
+};
+
+static __constant__ int ROT256[8][4] =
+{
+	46,36, 19, 37,
+	33,27, 14, 42,
+	17,49, 36, 39,
+	44, 9, 54, 56,
+	39,30, 34, 24,
+	13,50, 10, 17,
+	25,29, 39, 43,
+	8, 35, 56, 22,
+};
+
+static __constant__ uint2 skein_ks_parity = { 0xA9FC1A22,0x1BD11BDA};
+static __constant__ uint64_t skein_ks_parity64 = 0x1BD11BDAA9FC1A22;
+static __constant__ uint2 t12[6] =
+{ 
+{ 0x20, 0 },
+{ 0, 0xf0000000 },
+{ 0x20, 0xf0000000 },
+{ 0x08, 0 },
+{ 0, 0xff000000 },
+{ 0x08, 0xff000000 }
+};
+
+static __constant__ uint64_t t12_30[6] =
+{ 0x20,
+0xf000000000000000,
+0xf000000000000020,
+0x08,
+0xff00000000000000,
+0xff00000000000008
+};
+
+
+static __forceinline__ __device__ void Round512v35(uint2 &p0, uint2 &p1, uint2 &p2, uint2 &p3, uint2 &p4, uint2 &p5, uint2 &p6, uint2 &p7, int ROT)
+{
+	p0 += p1; p1 = ROL2(p1, ROT256[ROT][0]);  p1 ^= p0;
+	p2 += p3; p3 = ROL2(p3, ROT256[ROT][1]);  p3 ^= p2;
+	p4 += p5; p5 = ROL2(p5, ROT256[ROT][2]);  p5 ^= p4;
+	p6 += p7; p7 = ROL2(p7, ROT256[ROT][3]);  p7 ^= p6;
+}
+
+
+static __forceinline__ __device__ void Round_8_512v35(uint2 *ks,uint2 *ts,uint2 &p0, uint2 &p1, uint2 &p2, uint2 &p3, uint2 &p4, 
+                                                                          uint2 &p5, uint2 &p6, uint2 &p7, int R)
+{
+     	Round512v35(p0, p1, p2, p3, p4, p5, p6, p7, 0);
+	    Round512v35(p2, p1, p4, p7, p6, p5, p0, p3, 1);
+		Round512v35(p4, p1, p6, p3, p0, p5, p2, p7, 2);   
+		Round512v35(p6, p1, p0, p7, p2, p5, p4, p3, 3);   
+		p0 += ks[((R)+0) % 9];   /* inject the key schedule value */  
+	    p1 += ks[((R)+1) % 9];                                        
+		p2 += ks[((R)+2) % 9];                                        
+		p3 += ks[((R)+3) % 9];                                        
+		p4 += ks[((R)+4) % 9];                                        
+		p5 += ks[((R)+5) % 9] + ts[((R)+0) % 3];                      
+		p6 += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      
+		p7 += ks[((R)+7) % 9] + make_uint2((R),0);                            
+		Round512v35(p0, p1, p2, p3, p4, p5, p6, p7, 4);   
+		Round512v35(p2, p1, p4, p7, p6, p5, p0, p3, 5);   
+		Round512v35(p4, p1, p6, p3, p0, p5, p2, p7, 6);   
+		Round512v35(p6, p1, p0, p7, p2, p5, p4, p3, 7);   
+		p0 += ks[((R)+1) % 9];   /* inject the key schedule value */  
+		p1 += ks[((R)+2) % 9];                                        
+		p2 += ks[((R)+3) % 9];                                        
+		p3 += ks[((R)+4) % 9];                                        
+		p4 += ks[((R)+5) % 9];                                        
+		p5 += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      
+		p6 += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      
+		p7 += ks[((R)+8) % 9] + make_uint2((R)+1, 0);  
+}
+
+
+static __forceinline__ __device__ void Round512v30(uint64_t &p0, uint64_t &p1, uint64_t &p2, uint64_t &p3, uint64_t &p4, uint64_t &p5, uint64_t &p6, uint64_t &p7, int ROT)
+{
+	p0 += p1; p1 = ROTL64(p1, ROT256[ROT][0]);  p1 ^= p0;
+	p2 += p3; p3 = ROTL64(p3, ROT256[ROT][1]);  p3 ^= p2;
+	p4 += p5; p5 = ROTL64(p5, ROT256[ROT][2]);  p5 ^= p4;
+	p6 += p7; p7 = ROTL64(p7, ROT256[ROT][3]);  p7 ^= p6;
+}
+
+static __forceinline__ __device__ void Round_8_512v30(uint64_t *ks, uint64_t *ts, uint64_t &p0, uint64_t &p1, uint64_t &p2, uint64_t &p3, uint64_t &p4,
+	uint64_t &p5, uint64_t &p6, uint64_t &p7, int R)
+{
+	Round512v30(p0, p1, p2, p3, p4, p5, p6, p7, 0);
+	Round512v30(p2, p1, p4, p7, p6, p5, p0, p3, 1);
+	Round512v30(p4, p1, p6, p3, p0, p5, p2, p7, 2);
+	Round512v30(p6, p1, p0, p7, p2, p5, p4, p3, 3);
+	p0 += ks[((R)+0) % 9];   /* inject the key schedule value */
+	p1 += ks[((R)+1) % 9];
+	p2 += ks[((R)+2) % 9];
+	p3 += ks[((R)+3) % 9];
+	p4 += ks[((R)+4) % 9];
+	p5 += ks[((R)+5) % 9] + ts[((R)+0) % 3];
+	p6 += ks[((R)+6) % 9] + ts[((R)+1) % 3];
+	p7 += ks[((R)+7) % 9] + R;
+	Round512v30(p0, p1, p2, p3, p4, p5, p6, p7, 4);
+	Round512v30(p2, p1, p4, p7, p6, p5, p0, p3, 5);
+	Round512v30(p4, p1, p6, p3, p0, p5, p2, p7, 6);
+	Round512v30(p6, p1, p0, p7, p2, p5, p4, p3, 7);
+	p0 += ks[((R)+1) % 9];   /* inject the key schedule value */
+	p1 += ks[((R)+2) % 9];
+	p2 += ks[((R)+3) % 9];
+	p3 += ks[((R)+4) % 9];
+	p4 += ks[((R)+5) % 9];
+	p5 += ks[((R)+6) % 9] + ts[((R)+1) % 3];
+	p6 += ks[((R)+7) % 9] + ts[((R)+2) % 3];
+	p7 += ks[((R)+8) % 9] + (R)+1;
+}
+
+
+
+__global__ void __launch_bounds__(256,3) skein256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+		uint2 h[9];
+		uint2 t[3];
+        uint2 dt0,dt1,dt2,dt3;
+		uint2 p0, p1, p2, p3, p4, p5, p6, p7;
+        h[8] = skein_ks_parity;
+		for (int i = 0; i<8; i++) {
+			h[i] = vSKEIN_IV512_256[i];
+			h[8] ^= h[i];}
+		    
+			t[0]=t12[0];
+			t[1]=t12[1];
+			t[2]=t12[2];
+
+         
+		LOHI(dt0.x,dt0.y,outputHash[thread]);
+		LOHI(dt1.x,dt1.y,outputHash[threads+thread]);
+		LOHI(dt2.x,dt2.y,outputHash[2*threads+thread]);
+		LOHI(dt3.x,dt3.y,outputHash[3*threads+thread]);
+
+		p0 = h[0] + dt0;
+		p1 = h[1] + dt1;
+		p2 = h[2] + dt2;
+		p3 = h[3] + dt3;
+		p4 = h[4];
+		p5 = h[5] + t[0];
+		p6 = h[6] + t[1];
+		p7 = h[7];
+
+        #pragma unroll 
+		for (int i = 1; i<19; i+=2) {Round_8_512v35(h,t,p0,p1,p2,p3,p4,p5,p6,p7,i);}
+        p0 ^= dt0;
+        p1 ^= dt1;
+        p2 ^= dt2;
+        p3 ^= dt3;
+
+		h[0] = p0;
+		h[1] = p1;
+		h[2] = p2;
+		h[3] = p3;
+		h[4] = p4;
+		h[5] = p5;
+		h[6] = p6;
+		h[7] = p7;
+		h[8] = skein_ks_parity;
+        #pragma unroll 8
+		for (int i = 0; i<8; i++) { h[8] ^= h[i]; }
+		
+		t[0] = t12[3];
+		t[1] = t12[4];
+		t[2] = t12[5];
+		p5 += t[0];  //p5 already equal h[5] 
+		p6 += t[1];
+        #pragma unroll 
+		for (int i = 1; i<19; i+=2) { Round_8_512v35(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); }
+
+		outputHash[thread]           = devectorize(p0);
+		outputHash[threads+thread]   = devectorize(p1);
+		outputHash[2*threads+thread] = devectorize(p2);
+		outputHash[3*threads+thread] = devectorize(p3);
+
+	} //thread
+}
+
+__global__ void __launch_bounds__(256, 3) skein256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+		uint64_t h[9];
+		uint64_t t[3];
+		uint64_t dt0, dt1, dt2, dt3;
+		uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
+		h[8] = skein_ks_parity64;
+		for (int i = 0; i<8; i++) {
+			h[i] = SKEIN_IV512_256[i];
+			h[8] ^= h[i];
+		}
+
+		t[0] = devectorize(t12[0]);
+		t[1] = devectorize(t12[1]);
+		t[2] = devectorize(t12[2]);
+
+		dt0 = outputHash[thread];
+		dt1 = outputHash[threads+thread];
+		dt2 = outputHash[2*threads+thread];
+		dt3 = outputHash[3*threads+thread];
+		p0 = h[0] + dt0;
+		p1 = h[1] + dt1;
+		p2 = h[2] + dt2;
+		p3 = h[3] + dt3;
+		p4 = h[4];
+		p5 = h[5] + t[0];
+		p6 = h[6] + t[1];
+		p7 = h[7];
+
+#pragma unroll 
+		for (int i = 1; i<19; i += 2) { Round_8_512v30(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); }
+		p0 ^= dt0;
+		p1 ^= dt1;
+		p2 ^= dt2;
+		p3 ^= dt3;
+
+		h[0] = p0;
+		h[1] = p1;
+		h[2] = p2;
+		h[3] = p3;
+		h[4] = p4;
+		h[5] = p5;
+		h[6] = p6;
+		h[7] = p7;
+		h[8] = skein_ks_parity64;
+#pragma unroll 8
+		for (int i = 0; i<8; i++) { h[8] ^= h[i]; }
+
+		t[0] = t12_30[3];
+		t[1] = t12_30[4];
+		t[2] = t12_30[5];
+		p5 += t[0];  //p5 already equal h[5] 
+		p6 += t[1];
+#pragma unroll 
+		for (int i = 1; i<19; i += 2) { Round_8_512v30(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); }
+
+		outputHash[thread] = p0;
+		outputHash[threads + thread] = p1;
+		outputHash[2 * threads + thread] = p2;
+		outputHash[3 * threads + thread] = p3;
+
+
+	} //thread
+}
+
+   
+void skein256_cpu_init(int thr_id, int threads)
+{
+//empty //mepty //mpety //mptey// mptye	
+} 
+
+
+__host__ void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+	
+	const int threadsperblock = 256;
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	if (compute_version[thr_id] >= 35) {
+	skein256_gpu_hash_32 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	} else {
+	skein256_gpu_hash_32_v30 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	}
+	MyStreamSynchronize(NULL, order, thr_id);
+
+}
+
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
new file mode 100644
index 0000000000..883d6a4218
--- /dev/null
+++ b/Algo256/keccak256.cu
@@ -0,0 +1,102 @@
+/*
+ * test routine for new algorithm
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_keccak.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void keccak256_cpu_init(int thr_id, int threads);
+extern void keccak256_setBlock_80(void *pdata,const void *ptarget);
+extern uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+// fresh Hashfunktion
+inline void cpu_hash(void *state, const void *input)
+{
+
+    sph_keccak_context ctx_keccak;
+    
+	
+    uint32_t hash[16];
+
+    sph_keccak256_init(&ctx_keccak);
+    sph_keccak256 (&ctx_keccak, input, 80);
+    sph_keccak256_close(&ctx_keccak, (void*) hash);			
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8*8;
+	
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		keccak256_cpu_init(thr_id, throughput);
+		
+		init[thr_id] = true;
+	}
+
+	
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++) {
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);	
+	}
+	keccak256_setBlock_80((void*)endiandata, ptarget);
+	do {
+		int order = 0;
+		
+		uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+
+			cpu_hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+				 return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg);
+			}
+		}
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/Debug/ccminer.vcxprojResolveAssemblyReference.cache b/Debug/ccminer.vcxprojResolveAssemblyReference.cache
new file mode 100644
index 0000000000..c6f2429bec
Binary files /dev/null and b/Debug/ccminer.vcxprojResolveAssemblyReference.cache differ
diff --git a/Makefile.am b/Makefile.am
index 831cfd23c5..bf33157c00 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,8 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
 			  sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
+			  sph/sph_sha2.c sph/tiger.c sph/ripemd.c sph/sph_sha2.h sph/sph_tiger.h sph/sph_ripemd.h \
+			  sph/neoscrypt.c sph/neoscrypt.h sph/Lyra2.c sph/Sponge.c sph/Lyra2.h sph/Sponge.h \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
@@ -36,25 +38,48 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  cuda_nist5.cu \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
-			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
+			  sph/shabal.c sph/whirlpool.c sph/sph_shabal.h sph/sph_whirlpool.h \
+			  sph/haval.c sph/sph_haval.h sph/sph_sha2.h sph/sha2big.c \
+			  qubit/qubit.cu qubit/qubit_luffa512.cu x13/x14.cu x13/fresh.cu x13/x17.cu \
+			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu x13/x15.cu x13/cuda_shabal512.cu x13/cuda_whirlpool512.cu \
+			  x13/cuda_haval512.cu x13/cuda_sha512.cu qubit/doom.cu x13/goalcoin.cu \
+			  x13/whirlpool.cu \
+			  Algo256/cuda_keccak256.cu Algo256/keccak256.cu Algo256/cuda_blake256.cu  \
+			  Algo256/cuda_groestl256.cu Algo256/cuda_skein256.cu \
+			  lyra2/cuda_lyra2.cu lyra2/lyra2RE.cu \
+			  pluck/cuda_pluck.cu pluck/pluck.cu \
 			  x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
+			  x13/cuda_m7_sha256.cu x13/cuda_mul.cu x13/cuda_mul2.cu x13/cuda_ripemd160.cu x13/cuda_tiger192.cu \
+			  x13/m7_keccak512.cu x13/m7.cu qubit/deep.cu
 
 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
-ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
+ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ -lmpir
 ccminer_CPPFLAGS	= -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
 
 # we're now targeting all major compute architectures within one binary.
+# require cuda 6.5 or higher
 .cu.o:
-	$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+	$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
 
 # Shavite compiles faster with 128 regs
 x11/cuda_x11_shavite512.o: x11/cuda_x11_shavite512.cu
-	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=128 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<        
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=128 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<        
+
+# whirlpool compiles faster with 64 regs
+x13/cuda_whirlpool512.o: x13/cuda_whirlpool512.cu
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=64 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<        
+
+# whirlpool compiles faster with 64 regs
+x13/cuda_tiger192.o: x13/cuda_tiger192.cu
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=64 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<        
 
 # ABI requiring code modules
 quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu
-	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
 
 JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
-	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+
+x13/cuda_mul.o: x13/cuda_mul.cu
+	$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v"  -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\"  -gencode=arch=compute_52,code=\"sm_52,compute_52\"  --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index 93621c4e84..4d0cb5ef3c 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -1,5 +1,5 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
@@ -27,27 +27,31 @@
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -79,13 +83,14 @@
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\mpir-2.6.0\build.vc10\Win32\Release;.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <AdditionalDependencies>cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MTd.lib;ssleay32MTd.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Debug;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>..\mpir-2.6.0\build.vc10\Win32\Release;..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Debug;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
     </Link>
     <PostBuildEvent>
       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
@@ -100,7 +105,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     <CudaCompile>
       <PtxAsOptionV>true</PtxAsOptionV>
       <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_50,sm_50</CodeGeneration>
       <Include>
       </Include>
     </CudaCompile>
@@ -138,27 +143,28 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\mpir-2.6.0\build.vc10\Win32\Release;.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Release;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>mpir.lib;mpirxx.lib;cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>..\mpir-2.6.0\build.vc10\Win32\Release;..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Release;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
     </Link>
     <PostBuildEvent>
       <Command>echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     </PostBuildEvent>
     <CudaCompile>
-      <CInterleavedPTX>true</CInterleavedPTX>
+      <CInterleavedPTX>false</CInterleavedPTX>
     </CudaCompile>
     <CudaCompile>
       <MaxRegCount>80</MaxRegCount>
@@ -166,13 +172,16 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     <CudaCompile>
       <PtxAsOptionV>true</PtxAsOptionV>
       <Keep>true</Keep>
-      <CodeGeneration>compute_35,sm_35</CodeGeneration>
+      <CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_35,sm_35</CodeGeneration>
       <Include>
       </Include>
+      <GenerateLineInfo>false</GenerateLineInfo>
+      <KeepDir>.\Release\PrepFile</KeepDir>
     </CudaCompile>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
@@ -204,6 +213,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <CodeGeneration>compute_35,sm_35</CodeGeneration>
       <Include>
       </Include>
+      <Optimization>O3</Optimization>
     </CudaCompile>
   </ItemDefinitionGroup>
   <ItemGroup>
@@ -239,14 +249,24 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     <ClCompile Include="sph\echo.c" />
     <ClCompile Include="sph\fugue.c" />
     <ClCompile Include="sph\groestl.c" />
+    <ClCompile Include="sph\haval.c" />
     <ClCompile Include="sph\jh.c" />
     <ClCompile Include="sph\keccak.c" />
     <ClCompile Include="sph\luffa.c" />
+    <ClCompile Include="sph\Lyra2.c" />
+    <ClCompile Include="sph\neoscrypt.c" />
+    <ClCompile Include="sph\ripemd.c" />
+    <ClCompile Include="sph\sph_sha2.c" />
+    <ClCompile Include="sph\sha2big.c" />
+    <ClCompile Include="sph\shabal.c" />
     <ClCompile Include="sph\shavite.c" />
     <ClCompile Include="sph\simd.c" />
     <ClCompile Include="sph\skein.c" />
     <ClCompile Include="sph\hamsi.c" />
     <ClCompile Include="sph\hamsi_helper.c" />
+    <ClCompile Include="sph\Sponge.c" />
+    <ClCompile Include="sph\tiger.c" />
+    <ClCompile Include="sph\whirlpool.c" />
     <ClCompile Include="util.c">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">/TP %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">/TP %(AdditionalOptions)</AdditionalOptions>
@@ -264,6 +284,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     <ClInclude Include="cpuminer-config.h" />
     <ClInclude Include="cuda_groestlcoin.h" />
     <ClInclude Include="cuda_helper.h" />
+    <ClInclude Include="cuda_vector.h" />
     <ClInclude Include="elist.h" />
     <ClInclude Include="heavy\cuda_blake512.h" />
     <ClInclude Include="heavy\cuda_combine.h" />
@@ -273,22 +294,43 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     <ClInclude Include="heavy\cuda_sha256.h" />
     <ClInclude Include="hefty1.h" />
     <ClInclude Include="miner.h" />
+    <ClInclude Include="resource.h" />
+    <ClInclude Include="sph\Lyra2.h" />
+    <ClInclude Include="sph\neoscrypt.h" />
     <ClInclude Include="sph\sph_blake.h" />
     <ClInclude Include="sph\sph_bmw.h" />
     <ClInclude Include="sph\sph_cubehash.h" />
     <ClInclude Include="sph\sph_echo.h" />
     <ClInclude Include="sph\sph_groestl.h" />
+    <ClInclude Include="sph\sph_haval.h" />
     <ClInclude Include="sph\sph_jh.h" />
     <ClInclude Include="sph\sph_keccak.h" />
     <ClInclude Include="sph\sph_luffa.h" />
+    <ClInclude Include="sph\sph_ripemd.h" />
+    <ClInclude Include="sph\sph_sha2.h" />
     <ClInclude Include="sph\sph_shavite.h" />
     <ClInclude Include="sph\sph_simd.h" />
     <ClInclude Include="sph\sph_skein.h" />
     <ClInclude Include="sph\sph_hamsi.h" />
+    <ClInclude Include="sph\sph_tiger.h" />
     <ClInclude Include="sph\sph_types.h" />
+    <ClInclude Include="sph\sph_whirlpool.h" />
+    <ClInclude Include="sph\Sponge.h" />
     <ClInclude Include="uint256.h" />
   </ItemGroup>
   <ItemGroup>
+    <CudaCompile Include="Algo256\cuda_blake256.cu" />
+    <CudaCompile Include="Algo256\cuda_groestl256.cu" />
+    <CudaCompile Include="Algo256\cuda_keccak256.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">80</MaxRegCount>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">80</MaxRegCount>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\cuda_skein256.cu" />
+    <CudaCompile Include="Algo256\keccak256.cu" />
     <CudaCompile Include="cuda_fugue256.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -338,7 +380,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="heavy\cuda_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -362,7 +404,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -373,8 +415,12 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
+    <CudaCompile Include="lyra2\cuda_lyra2.cu" />
+    <CudaCompile Include="lyra2\lyra2RE.cu" />
+    <CudaCompile Include="pluck\cuda_pluck.cu" />
+    <CudaCompile Include="pluck\pluck.cu" />
     <CudaCompile Include="quark\animecoin.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -398,7 +444,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="quark\cuda_quark_checkhash.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -416,7 +462,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
     <CudaCompile Include="quark\cuda_quark_keccak512.cu">
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -433,6 +479,20 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
+    <CudaCompile Include="qubit\deep.cu" />
+    <CudaCompile Include="qubit\doom.cu" />
+    <CudaCompile Include="qubit\qubit.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit_luffa512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
     <CudaCompile Include="x11\cuda_x11_aes.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -463,7 +523,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
     </CudaCompile>
     <CudaCompile Include="x11\cuda_x11_shavite512.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
-      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">128</MaxRegCount>
@@ -491,11 +551,73 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
+    <CudaCompile Include="x13\cuda_haval512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_m7_sha256.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_mul.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_mul2.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_ripemd160.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_sha512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-v -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">74</MaxRegCount>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">74</MaxRegCount>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_shabal512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_tiger192.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">64</MaxRegCount>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">64</MaxRegCount>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_whirlpool512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas " -dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">64</MaxRegCount>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">64</MaxRegCount>
+    </CudaCompile>
     <CudaCompile Include="x13\cuda_x13_hamsi512.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">64</MaxRegCount>
+      <MaxRegCount Condition="'$(Configuration)|$(Platform)'=='Release|x64'">64</MaxRegCount>
     </CudaCompile>
     <CudaCompile Include="x13\cuda_x13_fugue512.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
@@ -503,15 +625,55 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
+    <CudaCompile Include="x13\fresh.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\goalcoin.cu" />
+    <CudaCompile Include="x13\m7.cu" />
+    <CudaCompile Include="x13\m7_keccak512.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\whirlpool.cu" />
     <CudaCompile Include="x13\x13.cu">
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
       <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
     </CudaCompile>
+    <CudaCompile Include="x13\x14.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\x15.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+    <CudaCompile Include="x13\x17.cu">
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Drawing" />
+    <Reference Include="System.Windows.Forms" />
+    <Reference Include="System.Xml" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 5.5.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 6.5.targets" />
   </ImportGroup>
 </Project>
\ No newline at end of file
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
index 2fb6824895..941ea883b8 100644
--- a/ccminer.vcxproj.filters
+++ b/ccminer.vcxproj.filters
@@ -58,6 +58,18 @@
     <Filter Include="Source Files\CUDA\x13">
       <UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Source Files\CUDA\qubit">
+      <UniqueIdentifier>{f3ed23a2-8ce7-41a5-b051-6da56047dc35}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\CUDA\Algo256">
+      <UniqueIdentifier>{a448ec8d-f346-4944-9786-aa0b55519306}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\CUDA\lyra2RE">
+      <UniqueIdentifier>{e23c7c23-ddfd-4da2-a51d-4fbeab96c66c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Source Files\CUDA\pluck">
+      <UniqueIdentifier>{59eb6b57-944a-425e-920a-bb168e950c45}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="compat\jansson\dump.c">
@@ -153,6 +165,36 @@
     <ClCompile Include="sph\hamsi_helper.c">
       <Filter>Source Files\sph</Filter>
     </ClCompile>
+    <ClCompile Include="sph\whirlpool.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\shabal.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\sha2big.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\haval.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\ripemd.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\tiger.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\sph_sha2.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\Lyra2.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\neoscrypt.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\Sponge.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h">
@@ -203,9 +245,6 @@
     <ClInclude Include="sph\sph_echo.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
-    <ClInclude Include="sph\sph_fugue.h">
-      <Filter>Header Files\sph</Filter>
-    </ClInclude>
     <ClInclude Include="sph\sph_groestl.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
@@ -254,14 +293,36 @@
     <ClInclude Include="sph\sph_hamsi.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
+    <ClInclude Include="sph\sph_whirlpool.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="resource.h" />
+    <ClInclude Include="sph\sph_haval.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\sph_sha2.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\sph_ripemd.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\sph_tiger.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\Lyra2.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\neoscrypt.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\Sponge.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="cuda_vector.h">
+      <Filter>Header Files\CUDA</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
-    <CudaCompile Include="cuda_fugue256.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
-    <CudaCompile Include="cuda_groestlcoin.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
     <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
       <Filter>Source Files\CUDA\JHA</Filter>
     </CudaCompile>
@@ -271,9 +332,6 @@
     <CudaCompile Include="quark\cuda_quark_checkhash.cu">
       <Filter>Source Files\CUDA\quark</Filter>
     </CudaCompile>
-    <CudaCompile Include="cuda_myriadgroestl.cu">
-      <Filter>Source Files\CUDA</Filter>
-    </CudaCompile>
     <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
       <Filter>Source Files\CUDA\JHA</Filter>
     </CudaCompile>
@@ -361,5 +419,104 @@
     <CudaCompile Include="x13\x13.cu">
       <Filter>Source Files\CUDA\x13</Filter>
     </CudaCompile>
+    <CudaCompile Include="x13\cuda_shabal512.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_whirlpool512.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\x15.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\x14.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\qubit_luffa512.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\x17.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\fresh.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_haval512.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_sha512.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\keccak256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="cuda_fugue256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="cuda_groestlcoin.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="cuda_myriadgroestl.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\cuda_keccak256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\whirlpool.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\m7.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_m7_sha256.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_ripemd160.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_tiger192.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\m7_keccak512.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\goalcoin.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_mul.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x13\cuda_mul2.cu">
+      <Filter>Source Files\CUDA\x13</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\deep.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="qubit\doom.cu">
+      <Filter>Source Files\CUDA\qubit</Filter>
+    </CudaCompile>
+    <CudaCompile Include="lyra2\cuda_lyra2.cu">
+      <Filter>Source Files\CUDA\lyra2RE</Filter>
+    </CudaCompile>
+    <CudaCompile Include="lyra2\lyra2RE.cu">
+      <Filter>Source Files\CUDA\lyra2RE</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\cuda_blake256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\cuda_groestl256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Algo256\cuda_skein256.cu">
+      <Filter>Source Files\CUDA\Algo256</Filter>
+    </CudaCompile>
+    <CudaCompile Include="pluck\cuda_pluck.cu">
+      <Filter>Source Files\CUDA\pluck</Filter>
+    </CudaCompile>
+    <CudaCompile Include="pluck\pluck.cu">
+      <Filter>Source Files\CUDA\pluck</Filter>
+    </CudaCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/ccminer.vcxproj.user b/ccminer.vcxproj.user
new file mode 100644
index 0000000000..ace9a86acb
--- /dev/null
+++ b/ccminer.vcxproj.user
@@ -0,0 +1,3 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+</Project>
\ No newline at end of file
diff --git a/compat/thrust/CHANGELOG b/compat/thrust/CHANGELOG
deleted file mode 100644
index 110c6689c2..0000000000
--- a/compat/thrust/CHANGELOG
+++ /dev/null
@@ -1,662 +0,0 @@
-#######################################
-#           Thrust v1.7.0             #
-#######################################
-
-Summary
-    Thrust 1.7.0 introduces a new interface for controlling algorithm execution as
-    well as several new algorithms and performance improvements. With this new
-    interface, users may directly control how algorithms execute as well as details
-    such as the allocation of temporary storage. Key/value versions of thrust::merge
-    and the set operation algorithms have been added, as well stencil versions of
-    partitioning algorithms. thrust::tabulate has been introduced to tabulate the
-    values of functions taking integers. For 32b types, new CUDA merge and set
-    operations provide 2-15x faster performance while a new CUDA comparison sort
-    provides 1.3-4x faster performance. Finally, a new TBB reduce_by_key implementation
-    provides 80% faster performance.
-
-Breaking API Changes
-    Dispatch
-      Custom user backend systems' tag types must now inherit from the corresponding system's execution_policy template (e.g. thrust::cuda::execution_policy) instead
-      of the tag struct (e.g. thrust::cuda::tag). Otherwise, algorithm specializations will silently go unfound during dispatch.
-      See examples/minimal_custom_backend.cu and examples/cuda/fallback_allocator.cu for usage examples.
-
-      thrust::advance and thrust::distance are no longer dispatched based on iterator system type and thus may no longer be customized.
-
-    Iterators
-      iterator_facade and iterator_adaptor's Pointer template parameters have been eliminated.
-      iterator_adaptor has been moved into the thrust namespace (previously thrust::experimental::iterator_adaptor).
-      iterator_facade has been moved into the thrust namespace (previously thrust::experimental::iterator_facade).
-      iterator_core_access has been moved into the thrust namespace (previously thrust::experimental::iterator_core_access).
-      All iterators' nested pointer typedef (the type of the result of operator->) is now void instead of a pointer type to indicate that such expressions are currently impossible.
-      Floating point counting_iterators' nested difference_type typedef is now a signed integral type instead of a floating point type.
-
-    Other
-      normal_distribution has been moved into the thrust::random namespace (previously thrust::random::experimental::normal_distribution).
-      Placeholder expressions may no longer include the comma operator.
-
-New Features
-    Execution Policies
-      Users may directly control the dispatch of algorithm invocations with optional execution policy arguments.
-      For example, instead of wrapping raw pointers allocated by cudaMalloc with thrust::device_ptr, the thrust::device execution_policy may be passed as an argument to an algorithm invocation to enable CUDA execution.
-      The following execution policies are supported in this version:
-
-        thrust::host
-        thrust::device
-        thrust::cpp::par
-        thrust::cuda::par
-        thrust::omp::par
-        thrust::tbb::par
-
-    Algorithms
-	free
-	get_temporary_buffer
-	malloc
-        merge_by_key
-        partition with stencil
-        partition_copy with stencil
-	return_temporary_buffer
-        set_difference_by_key
-        set_intersection_by_key
-        set_symmetric_difference_by_key
-        set_union_by_key
-        stable_partition with stencil
-        stable_partition_copy with stencil
-	tabulate
-
-New Examples
-    uninitialized_vector demonstrates how to use a custom allocator to avoid the automatic initialization of elements in thrust::device_vector.
-
-Other Enhancements
-    Authors of custom backend systems may manipulate arbitrary state during algorithm dispatch by incorporating it into their execution_policy parameter.
-    Users may control the allocation of temporary storage during algorithm execution by passing standard allocators as parameters via execution policies such as thrust::device.
-    THRUST_DEVICE_SYSTEM_CPP has been added as a compile-time target for the device backend. 
-    CUDA merge performance is 2-15x faster.
-    CUDA comparison sort performance is 1.3-4x faster.
-    CUDA set operation performance is 1.5-15x faster.
-    TBB reduce_by_key performance is 80% faster.
-    Several algorithms have been parallelized with TBB.
-    Support for user allocators in vectors has been improved.
-    The sparse_vector example is now implemented with merge_by_key instead of sort_by_key.
-    Warnings have been eliminated in various contexts.
-    Warnings about __host__ or __device__-only functions called from __host__ __device__ functions have been eliminated in various contexts.
-    Documentation about algorithm requirements have been improved.
-    Simplified the minimal_custom_backend example.
-    Simplified the cuda/custom_temporary_allocation example.
-    Simplified the cuda/fallback_allocator example.
-
-Bug Fixes
-    #248 fix broken counting_iterator<float> behavior with OpenMP
-    #231, #209 fix set operation failures with CUDA
-    #187 fix incorrect occupancy calculation with CUDA
-    #153 fix broken multigpu behavior with CUDA
-    #142 eliminate warning produced by thrust::random::taus88 and MSVC 2010
-    #208 correctly initialize elements in temporary storage when necessary
-    #16 fix compilation error when sorting bool with CUDA
-    #10 fix ambiguous overloads of reinterpret_tag
-
-Known Issues
-    g++ versions 4.3 and lower may fail to dispatch thrust::get_temporary_buffer correctly causing infinite recursion in examples such as cuda/custom_temporary_allocation.
-
-Acknowledgments
-    Thanks to Sean Baxter, Bryan Catanzaro, and Manjunath Kudlur for contributing a faster merge implementation for CUDA.
-    Thanks to Sean Baxter for contributing a faster set operation implementation for CUDA.
-    Thanks to Cliff Woolley for contributing a correct occupancy calculation algorithm.
-
-#######################################
-#           Thrust v1.6.0             #
-#######################################
-
-Summary
-    Thrust v1.6.0 provides an interface for customization and extension and a new
-    backend system based on the Threading Building Blocks library. With this
-    new interface, programmers may customize the behavior of specific algorithms
-    as well as control the allocation of temporary storage or invent entirely new
-    backends. These enhancements also allow multiple different backend systems
-    such as CUDA and OpenMP to coexist within a single program. Support for TBB
-    allows Thrust programs to integrate more naturally into applications which
-    may already employ the TBB task scheduler.
-
-Breaking API Changes
-    The header <thrust/experimental/cuda/pinned_allocator.h> has been moved to <thrust/system/cuda/experimental/pinned_allocator.h>
-    thrust::experimental::cuda::pinned_allocator has been moved to thrust::cuda::experimental::pinned_allocator
-    The macro THRUST_DEVICE_BACKEND has been renamed THRUST_DEVICE_SYSTEM
-    The macro THRUST_DEVICE_BACKEND_CUDA has been renamed THRUST_DEVICE_SYSTEM_CUDA
-    The macro THRUST_DEVICE_BACKEND_OMP has been renamed THRUST_DEVICE_SYSTEM_OMP
-    thrust::host_space_tag has been renamed thrust::host_system_tag
-    thrust::device_space_tag has been renamed thrust::device_system_tag
-    thrust::any_space_tag has been renamed thrust::any_system_tag
-    thrust::iterator_space has been renamed thrust::iterator_system
-    
-
-New Features
-    Backend Systems
-        Threading Building Blocks (TBB) is now supported
-    Functions
-        for_each_n
-        raw_reference_cast
-    Types
-        pointer
-        reference
-
-New Examples
-    cuda/custom_temporary_allocation
-    cuda/fallback_allocator
-    device_ptr
-    expand
-    minimal_custom_backend
-    raw_reference_cast
-    set_operations
-
-Other Enhancements
-    thrust::for_each now returns the end of the input range similar to most other algorithms
-    thrust::pair and thrust::tuple have swap functionality
-    all CUDA algorithms now support large data types
-    iterators may be dereferenced in user __device__ or __global__ functions
-    the safe use of different backend systems is now possible within a single binary
-
-Bug Fixes
-    #469 min_element and max_element algorithms no longer require a const comparison operator
-
-Known Issues
-    cudafe++.exe may crash when parsing TBB headers on Windows. 
-
-#######################################
-#           Thrust v1.5.3             #
-#######################################
-
-Summary
-    Small bug fixes
-
-Bug Fixes
-    Avoid warnings about potential race due to __shared__ non-POD variable
-
-#######################################
-#           Thrust v1.5.2             #
-#######################################
-
-Summary
-    Small bug fixes
-
-Bug Fixes
-    Fixed warning about C-style initialization of structures
-
-#######################################
-#           Thrust v1.5.1             #
-#######################################
-
-Summary
-    Small bug fixes
-
-Bug Fixes
-    Sorting data referenced by permutation_iterators on CUDA produces invalid results
-
-#######################################
-#           Thrust v1.5.0             #
-#######################################
-
-Summary
-    Thrust v1.5.0 provides introduces new programmer productivity and performance
-    enhancements. New functionality for creating anonymous "lambda" functions has
-    been added. A faster host sort provides 2-10x faster performance for sorting
-    arithmetic types on (single-threaded) CPUs. A new OpenMP sort provides
-    2.5x-3.0x speedup over the host sort using a quad-core CPU. When sorting
-    arithmetic types with the OpenMP backend the combined performance improvement
-    is 5.9x for 32-bit integers and ranges from 3.0x (64-bit types) to 14.2x
-    (8-bit types). A new CUDA reduce_by_key implementation provides 2-3x faster
-    performance.
-
-Breaking API Changes
-    device_ptr<void> no longer unsafely converts to device_ptr<T> without an
-    explicit cast. Use the expression
-    device_pointer_cast(static_cast<int*>(void_ptr.get()))
-    to convert, for example, device_ptr<void> to device_ptr<int>.
-
-New Features
-    Functions
-        stencil-less transform_if
-
-    Types
-        lambda placeholders
-
-New Examples
-    lambda
-
-Other Enhancements
-    host sort is 2-10x faster for arithmetic types
-    OMP sort provides speedup over host sort
-    reduce_by_key is 2-3x faster
-    reduce_by_key no longer requires O(N) temporary storage
-    CUDA scan algorithms are 10-40% faster
-    host_vector and device_vector are now documented
-    out-of-memory exceptions now provide detailed information from CUDART
-    improved histogram example
-    device_reference now has a specialized swap
-    reduce_by_key and scan algorithms are compatible with discard_iterator
-
-Removed Functionality
-
-Bug Fixes
-     #44 allow host_vector to compile when value_type uses __align__
-    #198 allow adjacent_difference to permit safe in-situ operation
-    #303 make thrust thread-safe
-    #313 avoid race conditions in device_vector::insert
-    #314 avoid unintended adl invocation when dispatching copy
-    #365 fix merge and set operation failures
-
-Known Issues
-    None
-
-Acknowledgments
-    Thanks to Manjunath Kudlur for contributing his Carbon library, from which the lambda functionality is derived.
-    Thanks to Jean-Francois Bastien for suggesting a fix for issue 303.
-
-#######################################
-#           Thrust v1.4.0             #
-#######################################
-
-Summary
-    Thrust v1.4.0 provides support for CUDA 4.0 in addition to many feature
-    and performance improvements.  New set theoretic algorithms operating on
-    sorted sequences have been added.  Additionally, a new fancy iterator
-    allows discarding redundant or otherwise unnecessary output from
-    algorithms, conserving memory storage and bandwidth.
-
-Breaking API Changes
-    Eliminations
-        thrust/is_sorted.h
-        thrust/utility.h
-        thrust/set_intersection.h
-        thrust/experimental/cuda/ogl_interop_allocator.h and the functionality therein
-        thrust::deprecated::copy_when
-        thrust::deprecated::absolute_value
-
-New Features
-    Functions
-        copy_n
-        merge
-        set_difference
-        set_symmetric_difference
-        set_union
-
-    Types
-        discard_iterator
-
-    Device support
-        Compute Capability 2.1 GPUs
-
-New Examples
-    run_length_decoding
-
-Other Enhancements
-    Compilation warnings are substantially reduced in various contexts.
-    The compilation time of thrust::sort, thrust::stable_sort, thrust::sort_by_key,
-    and thrust::stable_sort_by_key are substantially reduced.
-    A fast sort implementation is used when sorting primitive types with thrust::greater.
-    The performance of thrust::set_intersection is improved.
-    The performance of thrust::fill is improved on SM 1.x devices.
-    A code example is now provided in each algorithm's documentation.
-    thrust::reverse now operates in-place
-
-Removed Functionality
-    thrust::deprecated::copy_when
-    thrust::deprecated::absolute_value
-    thrust::experimental::cuda::ogl_interop_allocator
-    thrust::gather and thrust::scatter from host to device and vice versa are no longer supported.
-    Operations which modify the elements of a thrust::device_vector are no longer
-    available from source code compiled without nvcc when the device backend is CUDA.
-    Instead, use the idiom from the cpp_interop example.
-
-Bug Fixes
-    #212 set_intersection works correctly for large input sizes.
-    #275 counting_iterator and constant_iterator work correctly with OpenMP as the
-    backend when compiling with optimization
-    #256 min and max correctly return their first argument as a tie-breaker
-    #248 NDEBUG is interpreted correctly
-
-Known Issues
-    nvcc may generate code containing warnings when compiling some Thrust algorithms.
-    When compiling with -arch=sm_1x, some Thrust algorithms may cause nvcc to issue
-    benign pointer advisories.
-    When compiling with -arch=sm_1x and -G, some Thrust algorithms may fail to execute correctly.
-    thrust::inclusive_scan, thrust::exclusive_scan, thrust::inclusive_scan_by_key,
-    and thrust::exclusive_scan_by_key are currently incompatible with thrust::discard_iterator.
-
-Acknowledgments
-    Thanks to David Tarjan for improving the performance of set_intersection.
-    Thanks to Duane Merrill for continued help with sort.
-    Thanks to Nathan Whitehead for help with CUDA Toolkit integration.
-
-#######################################
-#           Thrust v1.3.0             #
-#######################################
-
-Summary
-    Thrust v1.3.0 provides support for CUDA 3.2 in addition to many feature
-    and performance enhancements.
-    
-    Performance of the sort and sort_by_key algorithms is improved by as much 
-    as 3x in certain situations.  The performance of stream compaction algorithms,
-    such as copy_if, is improved by as much as 2x.  Reduction performance is 
-    also improved, particularly for small input sizes.
-    
-    CUDA errors are now converted to runtime exceptions using the system_error
-    interface.  Combined with a debug mode, also new in v1.3, runtime errors
-    can be located with greater precision.
-
-    Lastly, a few header files have been consolidated or renamed for clarity.
-    See the deprecations section below for additional details.
-
-
-Breaking API Changes
-    Promotions
-        thrust::experimental::inclusive_segmented_scan has been renamed thrust::inclusive_scan_by_key and exposes a different interface
-        thrust::experimental::exclusive_segmented_scan has been renamed thrust::exclusive_scan_by_key and exposes a different interface
-        thrust::experimental::partition_copy has been renamed thrust::partition_copy and exposes a different interface
-        thrust::next::gather has been renamed thrust::gather
-        thrust::next::gather_if has been renamed thrust::gather_if
-        thrust::unique_copy_by_key has been renamed thrust::unique_by_key_copy
-    Deprecations
-        thrust::copy_when has been renamed thrust::deprecated::copy_when
-        thrust::absolute_value has been renamed thrust::deprecated::absolute_value
-        The header thrust/set_intersection.h is now deprecated; use thrust/set_operations.h instead
-        The header thrust/utility.h is now deprecated; use thrust/swap.h instead
-        The header thrust/swap_ranges.h is now deprecated; use thrust/swap.h instead
-    Eliminations
-        thrust::deprecated::gather
-        thrust::deprecated::gather_if
-        thrust/experimental/arch.h and the functions therein
-        thrust/sorting/merge_sort.h
-        thrust/sorting/radix_sort.h
-
-New Features
-    Functions
-        exclusive_scan_by_key
-        find
-        find_if
-        find_if_not
-        inclusive_scan_by_key
-        is_partitioned
-        is_sorted_until
-        mismatch
-        partition_point
-        reverse
-        reverse_copy
-        stable_partition_copy
-
-    Types
-        system_error and related types
-        experimental::cuda::ogl_interop_allocator
-        bit_and, bit_or, and bit_xor
-
-    Device support
-        gf104-based GPUs
-
-New Examples
-    opengl_interop.cu
-    repeated_range.cu
-    simple_moving_average.cu
-    sparse_vector.cu
-    strided_range.cu
-
-Other Enhancements
-    Performance of thrust::sort and thrust::sort_by_key is substantially improved for primitive key types
-    Performance of thrust::copy_if is substantially improved
-    Performance of thrust::reduce and related reductions is improved
-    THRUST_DEBUG mode added
-    Callers of Thrust functions may detect error conditions by catching thrust::system_error, which derives from std::runtime_error
-    The number of compiler warnings generated by Thrust has been substantially reduced
-    Comparison sort now works correctly for input sizes > 32M
-    min & max usage no longer collides with <windows.h> definitions
-    Compiling against the OpenMP backend no longer requires nvcc
-    Performance of device_vector initialized in .cpp files is substantially improved in common cases
-    Performance of thrust::sort_by_key on the host is substantially improved
-
-Removed Functionality
-    nvcc 2.3 is no longer supported
-
-Bug Fixes
-    Debug device code now compiles correctly
-    thrust::uninitialized_copy and thrust::unintialized_fill now dispatch constructors on the device rather than the host
-
-Known Issues
-    #212 set_intersection is known to fail for large input sizes
-    partition_point is known to fail for 64b types with nvcc 3.2
-
-Acknowledgments
-    Thanks to Duane Merrill for contributing a fast CUDA radix sort implementation
-    Thanks to Erich Elsen for contributing an implementation of find_if
-    Thanks to Andrew Corrigan for contributing changes which allow the OpenMP backend to compile in the absence of nvcc
-    Thanks to Andrew Corrigan, Cliff Wooley, David Coeurjolly, Janick Martinez Esturo, John Bowers, Maxim Naumov, Michael Garland, and Ryuta Suzuki for bug reports
-    Thanks to Cliff Woolley for help with testing
-
-#######################################
-#           Thrust v1.2.1             #
-#######################################
-
-Summary
-    Small fixes for compatibility with CUDA 3.1
-
-Known Issues
-    inclusive_scan & exclusive_scan may fail with very large types
-    the Microsoft compiler may fail to compile code using both sort and binary search algorithms
-    uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device
-    # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads
-    default_random_engine::discard is not accelerated with nvcc 2.3
-    nvcc 3.1 may fail to compile code using types derived from thrust::subtract_with_carry_engine, such as thrust::ranlux24 & thrust::ranlux48.
-
-#######################################
-#           Thrust v1.2.0             #
-#######################################
-
-Summary
-    Thrust v1.2 introduces support for compilation to multicore CPUs
-    and the Ocelot virtual machine, and several new facilities for
-    pseudo-random number generation.  New algorithms such as set
-    intersection and segmented reduction have also been added.  Lastly,
-    improvements to the robustness of the CUDA backend ensure
-    correctness across a broad set of (uncommon) use cases.
-
-Breaking API Changes
-    thrust::gather's interface was incorrect and has been removed.
-    The old interface is deprecated but will be preserved for Thrust
-    version 1.2 at thrust::deprecated::gather &
-    thrust::deprecated::gather_if. The new interface is provided at
-    thrust::next::gather & thrust::next::gather_if.  The new interface
-    will be promoted to thrust:: in Thrust version 1.3. For more details,
-    please refer to this thread:
-    http://groups.google.com/group/thrust-users/browse_thread/thread/f5f0583cb97b51fd
-
-    The thrust::sorting namespace has been deprecated in favor of the
-    top-level sorting functions, such as thrust::sort() and
-    thrust::sort_by_key().
-
-New Features
-    Functions
-        reduce_by_key
-        set_intersection
-        tie
-        unique_copy
-        unique_by_key
-        unique_copy_by_key
-
-    Types
-        Random Number Generation
-            discard_block_engine
-            default_random_engine
-            linear_congruential_engine
-            linear_feedback_shift_engine
-            minstd_rand
-            minstd_rand0
-            normal_distribution (experimental)
-            ranlux24
-            ranlux48
-            ranlux24_base
-            ranlux48_base
-            subtract_with_carry_engine
-            taus88
-            uniform_int_distribution
-            uniform_real_distribution
-            xor_combine_engine
-        Functionals
-            project1st
-            project2nd
-
-    Fancy Iterators
-        permutation_iterator
-        reverse_iterator
-
-    Device support
-        Add support for multicore CPUs via OpenMP
-        Add support for Fermi-class GPUs
-        Add support for Ocelot virtual machine
-
-New Examples
-    cpp_integration
-    histogram
-    mode
-    monte_carlo
-    monte_carlo_disjoint_sequences
-    padded_grid_reduction
-    permutation_iterator
-    row_sum
-    run_length_encoding
-    segmented_scan
-    stream_compaction
-    summary_statistics
-    transform_iterator
-    word_count
-
-Other Enhancements
-    vector functions operator!=, rbegin, crbegin, rend, crend, data, & shrink_to_fit
-    integer sorting performance is improved when max is large but (max - min) is small and when min is negative
-    performance of inclusive_scan() and exclusive_scan() is improved by 20-25% for primitive types
-    support for nvcc 3.0
-
-Removed Functionality
-    removed support for equal between host & device sequences
-    removed support for gather() and scatter() between host & device sequences
-
-Bug Fixes
-    # 8 cause a compiler error if the required compiler is not found rather than a mysterious error at link time
-    # 42 device_ptr & device_reference are classes rather than structs, eliminating warnings on certain platforms
-    # 46 gather & scatter handle any space iterators correctly
-    # 51 thrust::experimental::arch functions gracefully handle unrecognized GPUs
-    # 52 avoid collisions with common user macros such as BLOCK_SIZE
-    # 62 provide better documentation for device_reference
-    # 68 allow built-in CUDA vector types to work with device_vector in pure C++ mode
-    # 102 eliminated a race condition in device_vector::erase
-    various compilation warnings eliminated
-
-Known Issues
-   inclusive_scan & exclusive_scan may fail with very large types
-   the Microsoft compiler may fail to compile code using both sort and binary search algorithms
-   uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device
-   # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads
-   default_random_engine::discard is not accelerated with nvcc 2.3
-
-Acknowledgments
-   Thanks to Gregory Diamos for contributing a CUDA implementation of set_intersection
-   Thanks to Ryuta Suzuki & Gregory Diamos for rigorously testing Thrust's unit tests and examples against Ocelot
-   Thanks to Tom Bradley for contributing an implementation of normal_distribution
-   Thanks to Joseph Rhoads for contributing the example summary_statistics
-
-#######################################
-#           Thrust v1.1.1             #
-#######################################
-
-Summary
-    Small fixes for compatibility with CUDA 2.3a and Mac OSX Snow Leopard.
-
-#######################################
-#           Thrust v1.1.0             #
-#######################################
-
-Summary
-    Thrust v1.1 introduces fancy iterators, binary search functions, and
-    several specialized reduction functions.  Experimental support for
-    segmented scan has also been added.
-
-Breaking API Changes
-    counting_iterator has been moved into the thrust namespace (previously thrust::experimental)
-
-New Features
-    Functions
-        copy_if
-        lower_bound
-        upper_bound
-        vectorized lower_bound
-        vectorized upper_bound
-        equal_range
-        binary_search
-        vectorized binary_search
-        all_of
-        any_of
-        none_of
-        minmax_element
-        advance
-        inclusive_segmented_scan (experimental)
-        exclusive_segmented_scan (experimental)
-
-    Types
-        pair
-        tuple
-        device_malloc_allocator
-
-    Fancy Iterators
-        constant_iterator
-        counting_iterator
-        transform_iterator
-        zip_iterator
-
-New Examples
-    computing the maximum absolute difference between vectors
-    computing the bounding box of a two-dimensional point set
-    sorting multiple arrays together (lexicographical sorting)
-    constructing a summed area table
-    using zip_iterator to mimic an array of structs
-    using constant_iterator to increment array values
-
-Other Enhancements
-    added pinned memory allocator (experimental)
-    added more methods to host_vector & device_vector (issue #4)
-    added variant of remove_if with a stencil argument (issue #29)
-    scan and reduce use cudaFuncGetAttributes to determine grid size
-    exceptions are reported when temporary device arrays cannot be allocated 
-
-Bug Fixes
-     #5 make vector work for larger data types
-     #9 stable_partition_copy doesn't respect OutputIterator concept semantics
-    #10 scans should return OutputIterator
-    #16 make algorithms work for larger data types
-    #27 dispatch radix_sort even when comp=less<T> is explicitly provided
-
-Known Issues
-    Using functors with Thrust entry points may not compile on Mac OSX with gcc-4.0.1
-    uninitialized_copy & uninitialized_fill dispatch constructors on the host rather than the device.
-    inclusive_scan, inclusive_scan_by_key, exclusive_scan, and exclusive_scan_by_key may fail when used with large types with the CUDA 3.1 driver
-
-
-#######################################
-#           Thrust v1.0.0             #
-#######################################
-
-Breaking API changes
-    Rename top level namespace komrade to thrust.
-    Move partition_copy() & stable_partition_copy() into thrust::experimental namespace until we can easily provide the standard interface.
-    Rename range() to sequence() to avoid collision with Boost.Range.
-    Rename copy_if() to copy_when() due to semantic differences with C++0x copy_if().
-
-New Features
-    Add C++0x style cbegin() & cend() methods to host_vector & device_vector.
-    Add transform_if function.
-    Add stencil versions of replace_if() & replace_copy_if().
-    Allow counting_iterator to work with for_each().
-    Allow types with constructors in comparison sort & reduce.
-
-Other Enhancements
-    merge_sort and stable_merge_sort are now 2 to 5x faster when executed on the parallel device.
-
-Bug fixes
-    Workaround an issue where an incremented iterator causes nvcc to crash. (Komrade issue #6)
-    Fix an issue where const_iterators could not be passed to transform. (Komrade issue #7)
-
diff --git a/compat/thrust/adjacent_difference.h b/compat/thrust/adjacent_difference.h
deleted file mode 100644
index 772b5f993f..0000000000
--- a/compat/thrust/adjacent_difference.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.h
- *  \brief Compute difference between consecutive elements of a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations Transformations
- *  \{
- */
-
-
-/*! \p adjacent_difference calculates the differences of adjacent elements in the
- *  range <tt>[first, last)</tt>. That is, <tt>\*first</tt> is assigned to
- *  <tt>\*result</tt>, and, for each iterator \p i in the range
- *  <tt>[first + 1, last)</tt>, the difference of <tt>\*i</tt> and <tt>*(i - 1)</tt>
- *  is assigned to <tt>\*(result + (i - first))</tt>.
- *
- *  This version of \p adjacent_difference uses <tt>operator-</tt> to calculate
- *  differences.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \return The iterator <tt>result + (last - first)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types,
- *          and the return type of <tt>x - y</tt> is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \remark Note that \p result is permitted to be the same iterator as \p first. This is
- *          useful for computing differences "in place".
- *
- *  The following code snippet demonstrates how to use \p adjacent_difference to compute
- *  the difference between adjacent elements of a range using the \p thrust::device execution policy:
- *
- *  \code
- *  #include <thrust/adjacent_difference.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2};
- *  thrust::device_vector<int> d_data(h_data, h_data + 8);
- *  thrust::device_vector<int> d_result(8);
- *
- *  thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin());
- *
- *  // d_result is now [1, 1, -1, 1, -1, 1, -1, 1]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/adjacent_difference.html
- *  \see inclusive_scan
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator>
-OutputIterator adjacent_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last, 
-                                   OutputIterator result);
-
-/*! \p adjacent_difference calculates the differences of adjacent elements in the
- *  range <tt>[first, last)</tt>. That is, <tt>*first</tt> is assigned to
- *  <tt>\*result</tt>, and, for each iterator \p i in the range
- *  <tt>[first + 1, last)</tt>, <tt>binary_op(\*i, \*(i - 1))</tt> is assigned to
- *  <tt>\*(result + (i - first))</tt>.
- *  
- *  This version of \p adjacent_difference uses the binary function \p binary_op to
- *  calculate differences.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \param binary_op The binary function used to compute differences.
- *  \return The iterator <tt>result + (last - first)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *
- *  \remark Note that \p result is permitted to be the same iterator as \p first. This is
- *          useful for computing differences "in place".
- *
- *  The following code snippet demonstrates how to use \p adjacent_difference to compute
- *  the sum between adjacent elements of a range using the \p thrust::device execution policy:
- *
- *  \code
- *  #include <thrust/adjacent_difference.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2};
- *  thrust::device_vector<int> d_data(h_data, h_data + 8);
- *  thrust::device_vector<int> d_result(8);
- *
- *  thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin(), thrust::plus<int>());
- *
- *  // d_data is now [1, 3, 3, 3, 3, 3, 3, 3]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/adjacent_difference.html
- *  \see inclusive_scan
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename BinaryFunction>
-OutputIterator adjacent_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op);
-
-/*! \p adjacent_difference calculates the differences of adjacent elements in the
- *  range <tt>[first, last)</tt>. That is, <tt>\*first</tt> is assigned to
- *  <tt>\*result</tt>, and, for each iterator \p i in the range
- *  <tt>[first + 1, last)</tt>, the difference of <tt>\*i</tt> and <tt>*(i - 1)</tt>
- *  is assigned to <tt>\*(result + (i - first))</tt>.
- *
- *  This version of \p adjacent_difference uses <tt>operator-</tt> to calculate
- *  differences.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \return The iterator <tt>result + (last - first)</tt>
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types,
- *          and the return type of <tt>x - y</tt> is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \remark Note that \p result is permitted to be the same iterator as \p first. This is
- *          useful for computing differences "in place".
- *
- *  The following code snippet demonstrates how to use \p adjacent_difference to compute
- *  the difference between adjacent elements of a range.
- *
- *  \code
- *  #include <thrust/adjacent_difference.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2};
- *  thrust::device_vector<int> d_data(h_data, h_data + 8);
- *  thrust::device_vector<int> d_result(8);
- *
- *  thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin());
- *
- *  // d_result is now [1, 1, -1, 1, -1, 1, -1, 1]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/adjacent_difference.html
- *  \see inclusive_scan
- */
-template <typename InputIterator, typename OutputIterator>
-OutputIterator adjacent_difference(InputIterator first, InputIterator last, 
-                                   OutputIterator result);
-
-/*! \p adjacent_difference calculates the differences of adjacent elements in the
- *  range <tt>[first, last)</tt>. That is, <tt>*first</tt> is assigned to
- *  <tt>\*result</tt>, and, for each iterator \p i in the range
- *  <tt>[first + 1, last)</tt>, <tt>binary_op(\*i, \*(i - 1))</tt> is assigned to
- *  <tt>\*(result + (i - first))</tt>.
- *  
- *  This version of \p adjacent_difference uses the binary function \p binary_op to
- *  calculate differences.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \param binary_op The binary function used to compute differences.
- *  \return The iterator <tt>result + (last - first)</tt>
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *
- *  \remark Note that \p result is permitted to be the same iterator as \p first. This is
- *          useful for computing differences "in place".
- *
- *  The following code snippet demonstrates how to use \p adjacent_difference to compute
- *  the sum between adjacent elements of a range.
- *
- *  \code
- *  #include <thrust/adjacent_difference.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2};
- *  thrust::device_vector<int> d_data(h_data, h_data + 8);
- *  thrust::device_vector<int> d_result(8);
- *
- *  thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin(), thrust::plus<int>());
- *
- *  // d_data is now [1, 3, 3, 3, 3, 3, 3, 3]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/adjacent_difference.html
- *  \see inclusive_scan
- */
-template <typename InputIterator, typename OutputIterator, typename BinaryFunction>
-OutputIterator adjacent_difference(InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op);
-
-/*! \}
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/adjacent_difference.inl>
-
diff --git a/compat/thrust/advance.h b/compat/thrust/advance.h
deleted file mode 100644
index e7f60b0d54..0000000000
--- a/compat/thrust/advance.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file advance.h
- *  \brief Advance an iterator by a given distance.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \p advance(i, n) increments the iterator \p i by the distance \p n. 
- *  If <tt>n > 0</tt> it is equivalent to executing <tt>++i</tt> \p n
- *  times, and if <tt>n < 0</tt> it is equivalent to executing <tt>--i</tt>
- *  \p n times. If <tt>n == 0</tt>, the call has no effect.
- *
- *  \param i The iterator to be advanced.
- *  \param n The distance by which to advance the iterator.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Distance is an integral type that is convertible to \p InputIterator's distance type. 
- *
- *  \pre \p n shall be negative only for bidirectional and random access iterators.
- *
- *  The following code snippet demonstrates how to use \p advance to increment
- *  an iterator a given number of times.
- *
- *  \code
- *  #include <thrust/advance.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> vec(13);
- *  thrust::device_vector<int>::iterator iter = vec.begin();
- *
- *  thrust::advance(iter, 7);
- *
- *  // iter - vec.begin() == 7
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/advance.html
- */
-template <typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n);
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
-#include <thrust/detail/advance.inl>
-
diff --git a/compat/thrust/binary_search.h b/compat/thrust/binary_search.h
deleted file mode 100644
index d2ac5a621e..0000000000
--- a/compat/thrust/binary_search.h
+++ /dev/null
@@ -1,1888 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.h
- *  \brief Search for values in sorted ranges.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-    
-/*! \addtogroup algorithms
- */
-
-
-/*! \addtogroup searching
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \addtogroup binary_search Binary Search
- *  \ingroup searching
- *  \{
- */
-
-
-//////////////////////   
-// Scalar Functions //
-//////////////////////
-
-
-/*! \p lower_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the first position where value could be
- * inserted without violating the ordering. This version of 
- * \p lower_bound uses <tt>operator<</tt> for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>*j < value</tt>. 
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return The furthermost iterator \c i, such that <tt>*i < value</tt>.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin()
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 1
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 8); // returns input.begin() + 4
- *  thrust::lower_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-ForwardIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const LessThanComparable &value);
-
-
-/*! \p lower_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the first position where value could be
- * inserted without violating the ordering. This version of 
- * \p lower_bound uses <tt>operator<</tt> for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>*j < value</tt>. 
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return The furthermost iterator \c i, such that <tt>*i < value</tt>.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::lower_bound(input.begin(), input.end(), 0); // returns input.begin()
- *  thrust::lower_bound(input.begin(), input.end(), 1); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 2); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 3); // returns input.begin() + 2
- *  thrust::lower_bound(input.begin(), input.end(), 8); // returns input.begin() + 4
- *  thrust::lower_bound(input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class LessThanComparable>
-ForwardIterator lower_bound(ForwardIterator first, 
-                            ForwardIterator last,
-                            const LessThanComparable& value);
-
-
-/*! \p lower_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the first position where value could be
- * inserted without violating the ordering. This version of 
- * \p lower_bound uses function object \c comp for comparison 
- * and returns the furthermost iterator \c i in <tt>[first, last)</tt>
- * such that, for every iterator \c j in <tt>[first, i)</tt>, 
- * <tt>comp(*j, value)</tt> is \c true. 
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return The furthermost iterator \c i, such that <tt>comp(*i, value)</tt> is \c true.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::lower_bound(input.begin(), input.end(), 0, thrust::less<int>()); // returns input.begin()
- *  thrust::lower_bound(input.begin(), input.end(), 1, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 2, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 3, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::lower_bound(input.begin(), input.end(), 8, thrust::less<int>()); // returns input.begin() + 4
- *  thrust::lower_bound(input.begin(), input.end(), 9, thrust::less<int>()); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T &value,
-                            StrictWeakOrdering comp);
-
-
-/*! \p lower_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the first position where value could be
- * inserted without violating the ordering. This version of 
- * \p lower_bound uses function object \c comp for comparison 
- * and returns the furthermost iterator \c i in <tt>[first, last)</tt>
- * such that, for every iterator \c j in <tt>[first, i)</tt>, 
- * <tt>comp(*j, value)</tt> is \c true. 
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return The furthermost iterator \c i, such that <tt>comp(*i, value)</tt> is \c true.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::lower_bound(input.begin(), input.end(), 0, thrust::less<int>()); // returns input.begin()
- *  thrust::lower_bound(input.begin(), input.end(), 1, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 2, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::lower_bound(input.begin(), input.end(), 3, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::lower_bound(input.begin(), input.end(), 8, thrust::less<int>()); // returns input.begin() + 4
- *  thrust::lower_bound(input.begin(), input.end(), 9, thrust::less<int>()); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class T, class StrictWeakOrdering>
-ForwardIterator lower_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& value, 
-                            StrictWeakOrdering comp);
-
-
-/*! \p upper_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the last position where value could be
- * inserted without violating the ordering. This version of 
- * \p upper_bound uses <tt>operator<</tt> for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>value < *j</tt>
- * is \c false.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return The furthermost iterator \c i, such that <tt>value < *i</tt> is \c false.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p upper_bound
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelism:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin() + 1
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 2
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 8); // returns input.end()
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-ForwardIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const LessThanComparable &value);
-
-
-/*! \p upper_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the last position where value could be
- * inserted without violating the ordering. This version of 
- * \p upper_bound uses <tt>operator<</tt> for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>value < *j</tt>
- * is \c false.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return The furthermost iterator \c i, such that <tt>value < *i</tt> is \c false.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p upper_bound
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::upper_bound(input.begin(), input.end(), 0); // returns input.begin() + 1
- *  thrust::upper_bound(input.begin(), input.end(), 1); // returns input.begin() + 1
- *  thrust::upper_bound(input.begin(), input.end(), 2); // returns input.begin() + 2
- *  thrust::upper_bound(input.begin(), input.end(), 3); // returns input.begin() + 2
- *  thrust::upper_bound(input.begin(), input.end(), 8); // returns input.end()
- *  thrust::upper_bound(input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class LessThanComparable>
-ForwardIterator upper_bound(ForwardIterator first, 
-                            ForwardIterator last,
-                            const LessThanComparable& value);
-
-
-/*! \p upper_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the last position where value could be
- * inserted without violating the ordering. This version of 
- * \p upper_bound uses function object \c comp for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>comp(value, *j)</tt>
- * is \c false.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return The furthermost iterator \c i, such that <tt>comp(value, *i)</tt> is \c false.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p upper_bound
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 0, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 1, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 2, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 3, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 8, thrust::less<int>()); // returns input.end()
- *  thrust::upper_bound(thrust::device, input.begin(), input.end(), 9, thrust::less<int>()); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T &value,
-                            StrictWeakOrdering comp);
-
-/*! \p upper_bound is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * Specifically, it returns the last position where value could be
- * inserted without violating the ordering. This version of 
- * \p upper_bound uses function object \c comp for comparison and returns
- * the furthermost iterator \c i in <tt>[first, last)</tt> such that,
- * for every iterator \c j in <tt>[first, i)</tt>, <tt>comp(value, *j)</tt>
- * is \c false.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return The furthermost iterator \c i, such that <tt>comp(value, *i)</tt> is \c false.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p upper_bound
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::upper_bound(input.begin(), input.end(), 0, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::upper_bound(input.begin(), input.end(), 1, thrust::less<int>()); // returns input.begin() + 1
- *  thrust::upper_bound(input.begin(), input.end(), 2, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::upper_bound(input.begin(), input.end(), 3, thrust::less<int>()); // returns input.begin() + 2
- *  thrust::upper_bound(input.begin(), input.end(), 8, thrust::less<int>()); // returns input.end()
- *  thrust::upper_bound(input.begin(), input.end(), 9, thrust::less<int>()); // returns input.end()
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class T, class StrictWeakOrdering>
-ForwardIterator upper_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& value, 
-                            StrictWeakOrdering comp);
-
-
-/*! \p binary_search is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  Specifically, this version returns \c true if and only if 
- * there exists an iterator \c i in <tt>[first, last)</tt> such that 
- * <tt>*i < value</tt> and <tt>value < *i</tt> are both \c false.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return \c true if an equivalent element exists in <tt>[first, last)</tt>, otherwise \c false.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 0); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 1); // returns false
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 2); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 3); // returns false
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 8); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 9); // returns false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-bool binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   ForwardIterator first, 
-                   ForwardIterator last,
-                   const LessThanComparable& value);
-
-
-/*! \p binary_search is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  Specifically, this version returns \c true if and only if 
- * there exists an iterator \c i in <tt>[first, last)</tt> such that 
- * <tt>*i < value</tt> and <tt>value < *i</tt> are both \c false.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return \c true if an equivalent element exists in <tt>[first, last)</tt>, otherwise \c false.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::binary_search(input.begin(), input.end(), 0); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 1); // returns false
- *  thrust::binary_search(input.begin(), input.end(), 2); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 3); // returns false
- *  thrust::binary_search(input.begin(), input.end(), 8); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 9); // returns false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <class ForwardIterator, class LessThanComparable>
-bool binary_search(ForwardIterator first, 
-                   ForwardIterator last,
-                   const LessThanComparable& value);
-
-
-/*! \p binary_search is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  Specifically, this version returns \c true if and only if 
- * there exists an iterator \c i in <tt>[first, last)</tt> such that 
- * <tt>comp(*i, value)</tt> and <tt>comp(value, *i)</tt> are both \c false.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return \c true if an equivalent element exists in <tt>[first, last)</tt>, otherwise \c false.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 0, thrust::less<int>()); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 1, thrust::less<int>()); // returns false
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 2, thrust::less<int>()); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 3, thrust::less<int>()); // returns false
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 8, thrust::less<int>()); // returns true
- *  thrust::binary_search(thrust::device, input.begin(), input.end(), 9, thrust::less<int>()); // returns false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   ForwardIterator first,
-                   ForwardIterator last,
-                   const T& value, 
-                   StrictWeakOrdering comp);
-
-
-/*! \p binary_search is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. 
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  Specifically, this version returns \c true if and only if 
- * there exists an iterator \c i in <tt>[first, last)</tt> such that 
- * <tt>comp(*i, value)</tt> and <tt>comp(value, *i)</tt> are both \c false.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return \c true if an equivalent element exists in <tt>[first, last)</tt>, otherwise \c false.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::binary_search(input.begin(), input.end(), 0, thrust::less<int>()); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 1, thrust::less<int>()); // returns false
- *  thrust::binary_search(input.begin(), input.end(), 2, thrust::less<int>()); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 3, thrust::less<int>()); // returns false
- *  thrust::binary_search(input.begin(), input.end(), 8, thrust::less<int>()); // returns true
- *  thrust::binary_search(input.begin(), input.end(), 9, thrust::less<int>()); // returns false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <class ForwardIterator, class T, class StrictWeakOrdering>
-bool binary_search(ForwardIterator first,
-                   ForwardIterator last,
-                   const T& value, 
-                   StrictWeakOrdering comp);
-
-
-/*! \p equal_range is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. The 
- * value returned by \p equal_range is essentially a combination of
- * the values returned by \p lower_bound and \p upper_bound: it returns
- * a \p pair of iterators \c i and \c j such that \c i is the first
- * position where value could be inserted without violating the 
- * ordering and \c j is the last position where value could be inserted
- * without violating the ordering. It follows that every element in the
- * range <tt>[i, j)</tt> is equivalent to value, and that 
- * <tt>[i, j)</tt> is the largest subrange of <tt>[first, last)</tt> that
- * has this property. 
- *
- * This version of \p equal_range returns a \p pair of iterators 
- * <tt>[i, j)</tt>, where \c i is the furthermost iterator in 
- * <tt>[first, last)</tt> such that, for every iterator \c k in 
- * <tt>[first, i)</tt>, <tt>*k < value</tt>.  \c j is the furthermost
- * iterator in <tt>[first, last)</tt> such that, for every iterator 
- * \c k in <tt>[first, j)</tt>, <tt>value < *k</tt> is \c false. 
- * For every iterator \c k in <tt>[i, j)</tt>, neither 
- * <tt>value < *k</tt> nor <tt>*k < value</tt> is \c true.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return A \p pair of iterators <tt>[i, j)</tt> that define the range of equivalent elements.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p equal_range
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 9); // returns [input.end(), input.end)
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal_range.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable& value);
-
-
-/*! \p equal_range is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. The 
- * value returned by \p equal_range is essentially a combination of
- * the values returned by \p lower_bound and \p upper_bound: it returns
- * a \p pair of iterators \c i and \c j such that \c i is the first
- * position where value could be inserted without violating the 
- * ordering and \c j is the last position where value could be inserted
- * without violating the ordering. It follows that every element in the
- * range <tt>[i, j)</tt> is equivalent to value, and that 
- * <tt>[i, j)</tt> is the largest subrange of <tt>[first, last)</tt> that
- * has this property. 
- *
- * This version of \p equal_range returns a \p pair of iterators 
- * <tt>[i, j)</tt>, where \c i is the furthermost iterator in 
- * <tt>[first, last)</tt> such that, for every iterator \c k in 
- * <tt>[first, i)</tt>, <tt>*k < value</tt>.  \c j is the furthermost
- * iterator in <tt>[first, last)</tt> such that, for every iterator 
- * \c k in <tt>[first, j)</tt>, <tt>value < *k</tt> is \c false. 
- * For every iterator \c k in <tt>[i, j)</tt>, neither 
- * <tt>value < *k</tt> nor <tt>*k < value</tt> is \c true.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \return A \p pair of iterators <tt>[i, j)</tt> that define the range of equivalent elements.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam LessThanComparable is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>. 
- *
- *  The following code snippet demonstrates how to use \p equal_range
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::equal_range(input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1)
- *  thrust::equal_range(input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1)
- *  thrust::equal_range(input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2)
- *  thrust::equal_range(input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2)
- *  thrust::equal_range(input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end)
- *  thrust::equal_range(input.begin(), input.end(), 9); // returns [input.end(), input.end)
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal_range.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p binary_search
- */
-template <class ForwardIterator, class LessThanComparable>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable& value);
-
-
-/*! \p equal_range is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. The 
- * value returned by \p equal_range is essentially a combination of
- * the values returned by \p lower_bound and \p upper_bound: it returns
- * a \p pair of iterators \c i and \c j such that \c i is the first
- * position where value could be inserted without violating the 
- * ordering and \c j is the last position where value could be inserted
- * without violating the ordering. It follows that every element in the
- * range <tt>[i, j)</tt> is equivalent to value, and that 
- * <tt>[i, j)</tt> is the largest subrange of <tt>[first, last)</tt> that
- * has this property. 
- *
- * This version of \p equal_range returns a \p pair of iterators 
- * <tt>[i, j)</tt>. \c i is the furthermost iterator in 
- * <tt>[first, last)</tt> such that, for every iterator \c k in 
- * <tt>[first, i)</tt>, <tt>comp(*k, value)</tt> is \c true.
- * \c j is the furthermost iterator in <tt>[first, last)</tt> such
- * that, for every iterator \c k in <tt>[first, last)</tt>, 
- * <tt>comp(value, *k)</tt> is \c false. For every iterator \c k 
- * in <tt>[i, j)</tt>, neither <tt>comp(value, *k)</tt> nor 
- * <tt>comp(*k, value)</tt> is \c true.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return A \p pair of iterators <tt>[i, j)</tt> that define the range of equivalent elements.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p equal_range
- *  to search for values in a ordered range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 0, thrust::less<int>()); // returns [input.begin(), input.begin() + 1)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 1, thrust::less<int>()); // returns [input.begin() + 1, input.begin() + 1)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 2, thrust::less<int>()); // returns [input.begin() + 1, input.begin() + 2)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 3, thrust::less<int>()); // returns [input.begin() + 2, input.begin() + 2)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 8, thrust::less<int>()); // returns [input.begin() + 4, input.end)
- *  thrust::equal_range(thrust::device, input.begin(), input.end(), 9, thrust::less<int>()); // returns [input.end(), input.end)
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal_range.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T& value,
-            StrictWeakOrdering comp);
-
-
-/*! \p equal_range is a version of binary search: it attempts to find
- * the element value in an ordered range <tt>[first, last)</tt>. The 
- * value returned by \p equal_range is essentially a combination of
- * the values returned by \p lower_bound and \p upper_bound: it returns
- * a \p pair of iterators \c i and \c j such that \c i is the first
- * position where value could be inserted without violating the 
- * ordering and \c j is the last position where value could be inserted
- * without violating the ordering. It follows that every element in the
- * range <tt>[i, j)</tt> is equivalent to value, and that 
- * <tt>[i, j)</tt> is the largest subrange of <tt>[first, last)</tt> that
- * has this property. 
- *
- * This version of \p equal_range returns a \p pair of iterators 
- * <tt>[i, j)</tt>. \c i is the furthermost iterator in 
- * <tt>[first, last)</tt> such that, for every iterator \c k in 
- * <tt>[first, i)</tt>, <tt>comp(*k, value)</tt> is \c true.
- * \c j is the furthermost iterator in <tt>[first, last)</tt> such
- * that, for every iterator \c k in <tt>[first, last)</tt>, 
- * <tt>comp(value, *k)</tt> is \c false. For every iterator \c k 
- * in <tt>[i, j)</tt>, neither <tt>comp(value, *k)</tt> nor 
- * <tt>comp(*k, value)</tt> is \c true.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param value The value to be searched.
- *  \param comp The comparison operator.
- *  \return A \p pair of iterators <tt>[i, j)</tt> that define the range of equivalent elements.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam T is comparable to \p ForwardIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p equal_range
- *  to search for values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::equal_range(input.begin(), input.end(), 0, thrust::less<int>()); // returns [input.begin(), input.begin() + 1)
- *  thrust::equal_range(input.begin(), input.end(), 1, thrust::less<int>()); // returns [input.begin() + 1, input.begin() + 1)
- *  thrust::equal_range(input.begin(), input.end(), 2, thrust::less<int>()); // returns [input.begin() + 1, input.begin() + 2)
- *  thrust::equal_range(input.begin(), input.end(), 3, thrust::less<int>()); // returns [input.begin() + 2, input.begin() + 2)
- *  thrust::equal_range(input.begin(), input.end(), 8, thrust::less<int>()); // returns [input.begin() + 4, input.end)
- *  thrust::equal_range(input.begin(), input.end(), 9, thrust::less<int>()); // returns [input.end(), input.end)
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal_range.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p binary_search
- */
-template <class ForwardIterator, class T, class StrictWeakOrdering>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(ForwardIterator first,
-            ForwardIterator last,
-            const T& value,
-            StrictWeakOrdering comp);
-
-
-/*! \addtogroup vectorized_binary_search Vectorized Searches
- *  \ingroup binary_search
- *  \{
- */
-
-
-//////////////////////
-// Vector Functions //
-//////////////////////
-
-
-/*! \p lower_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::lower_bound(thrust::device,
- *                      input.begin(), input.end(),
- *                      values.begin(), values.end(),
- *                      output.begin());
- *
- *  // output is now [0, 1, 1, 2, 4, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result);
-
-
-/*! \p lower_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::lower_bound(input.begin(), input.end(),
- *                      values.begin(), values.end(),
- *                      output.begin());
- *
- *  // output is now [0, 1, 1, 2, 4, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator>
-OutputIterator lower_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result);
-
-
-/*! \p lower_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.  This version of 
- * \p lower_bound uses function object \c comp for comparison.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::lower_bound(input.begin(), input.end(),
- *                      values.begin(), values.end(), 
- *                      output.begin(),
- *                      thrust::less<int>());
- *
- *  // output is now [0, 1, 1, 2, 4, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result,
-                           StrictWeakOrdering comp);
-
-
-/*! \p lower_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.  This version of 
- * \p lower_bound uses function object \c comp for comparison.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::lower_bound(input.begin(), input.end(),
- *                      values.begin(), values.end(), 
- *                      output.begin(),
- *                      thrust::less<int>());
- *
- *  // output is now [0, 1, 1, 2, 4, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/lower_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator, class StrictWeakOrdering>
-OutputIterator lower_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result,
-                           StrictWeakOrdering comp);
-
-
-/*! \p upper_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of last position where value could
- * be inserted without violating the ordering.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::upper_bound(thrust::device,
- *                      input.begin(), input.end(),
- *                      values.begin(), values.end(),
- *                      output.begin());
- *
- *  // output is now [1, 1, 2, 2, 5, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result);
-
-
-/*! \p upper_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of last position where value could
- * be inserted without violating the ordering.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::upper_bound(input.begin(), input.end(),
- *                      values.begin(), values.end(),
- *                      output.begin());
- *
- *  // output is now [1, 1, 2, 2, 5, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p upper_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator>
-OutputIterator upper_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result);
-
-
-/*! \p upper_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.  This version of 
- * \p upper_bound uses function object \c comp for comparison.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::upper_bound(thrust::device,
- *                      input.begin(), input.end(),
- *                      values.begin(), values.end(), 
- *                      output.begin(),
- *                      thrust::less<int>());
- *
- *  // output is now [1, 1, 2, 2, 5, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result,
-                           StrictWeakOrdering comp);
-
-
-/*! \p upper_bound is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * Specifically, it returns the index of first position where value could
- * be inserted without violating the ordering.  This version of 
- * \p upper_bound uses function object \c comp for comparison.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p lower_bound
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<unsigned int> output(6);
- *
- *  thrust::upper_bound(input.begin(), input.end(),
- *                      values.begin(), values.end(), 
- *                      output.begin(),
- *                      thrust::less<int>());
- *
- *  // output is now [1, 1, 2, 2, 5, 5]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/upper_bound.html
- *  \see \p lower_bound
- *  \see \p equal_range
- *  \see \p binary_search
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator, class StrictWeakOrdering>
-OutputIterator upper_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator result,
-                           StrictWeakOrdering comp);
-
-
-/*! \p binary_search is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and bool is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for multiple values in a ordered range using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<bool> output(6);
- *
- *  thrust::binary_search(thrust::device,
- *                        input.begin(), input.end(),
- *                        values.begin(), values.end(),
- *                        output.begin());
- *
- *  // output is now [true, false, true, false, true, false]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator result);
-
-
-/*! \p binary_search is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and bool is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<bool> output(6);
- *
- *  thrust::binary_search(input.begin(), input.end(),
- *                        values.begin(), values.end(),
- *                        output.begin());
- *
- *  // output is now [true, false, true, false, true, false]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator>
-OutputIterator binary_search(ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator result);
-
-
-/*! \p binary_search is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  This version of \p binary_search uses function object 
- * \c comp for comparison.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and bool is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for multiple values in a ordered range using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<bool> output(6);
- *
- *  thrust::binary_search(thrust::device,
- *                        input.begin(), input.end(),
- *                        values.begin(), values.end(),
- *                        output.begin(),
- *                        thrust::less<T>());
- *
- *  // output is now [true, false, true, false, true, false]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator result,
-                             StrictWeakOrdering comp);
-
-
-/*! \p binary_search is a vectorized version of binary search: for each 
- * iterator \c v in <tt>[values_first, values_last)</tt> it attempts to
- * find the value <tt>*v</tt> in an ordered range <tt>[first, last)</tt>.
- * It returns \c true if an element that is equivalent to \c value 
- * is present in <tt>[first, last)</tt> and \c false if no such element
- * exists.  This version of \p binary_search uses function object 
- * \c comp for comparison.
- *
- *  \param first The beginning of the ordered sequence.
- *  \param last The end of the ordered sequence.
- *  \param values_first The beginning of the search values sequence.
- *  \param values_last The end of the search values sequence.
- *  \param result The beginning of the output sequence.
- *  \param comp The comparison operator.
- * 
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *                        and \c InputIterator's \c value_type is <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThanComparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *                        and bool is convertible to \c OutputIterator's \c value_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p binary_search
- *  to search for multiple values in a ordered range.
- *
- *  \code
- *  #include <thrust/binary_search.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<int> input(5);
- *
- *  input[0] = 0;
- *  input[1] = 2;
- *  input[2] = 5;
- *  input[3] = 7;
- *  input[4] = 8;
- *
- *  thrust::device_vector<int> values(6);
- *  values[0] = 0; 
- *  values[1] = 1;
- *  values[2] = 2;
- *  values[3] = 3;
- *  values[4] = 8;
- *  values[5] = 9;
- *
- *  thrust::device_vector<bool> output(6);
- *
- *  thrust::binary_search(input.begin(), input.end(),
- *                        values.begin(), values.end(),
- *                        output.begin(),
- *                        thrust::less<T>());
- *
- *  // output is now [true, false, true, false, true, false]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/binary_search.html
- *  \see \p lower_bound
- *  \see \p upper_bound
- *  \see \p equal_range
- */
-template <class ForwardIterator, class InputIterator, class OutputIterator, class StrictWeakOrdering>
-OutputIterator binary_search(ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator result,
-                             StrictWeakOrdering comp);
-
-
-/*! \} // end vectorized_binary_search
- */
-
-
-/*! \} // end binary_search
- */
-
-
-/*! \} // end searching
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/binary_search.inl>
-
diff --git a/compat/thrust/copy.h b/compat/thrust/copy.h
deleted file mode 100644
index eaa9719459..0000000000
--- a/compat/thrust/copy.h
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file copy.h
- *  \brief Copies elements from one range to another
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup copying
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p copy copies elements from the range [\p first, \p last) to the range
- *  [\p result, \p result + (\p last - \p first)). That is, it performs
- *  the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1),
- *  and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy
- *  performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike
- *  \c std::copy, \p copy offers no guarantee on order of operation.  As a result,
- *  calling \p copy with overlapping source and destination ranges has undefined
- *  behavior.
- *
- *  The return value is \p result + (\p last - \p first).
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to copy.
- *  \param last The end of the sequence to copy.
- *  \param result The destination sequence.
- *  \return The end of the destination sequence.
- *  \see http://www.sgi.com/tech/stl/copy.html
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre \p result may be equal to \p first, but \p result shall not be in the range <tt>[first, last)</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p copy
- *  to copy from one range to another using the \p thrust::device parallelization policy:
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  thrust::device_vector<int> vec0(100);
- *  thrust::device_vector<int> vec1(100);
- *  ...
- *
- *  thrust::copy(thrust::device, vec0.begin(), vec0.end(), vec1.begin());
- *
- *  // vec1 is now a copy of vec0
- *  \endcode
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator>
-  OutputIterator copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-
-/*! \p copy_n copies elements from the range <tt>[first, first + n)</tt> to the range
- *  <tt>[result, result + n)</tt>. That is, it performs the assignments <tt>*result = *first, *(result + 1) = *(first + 1)</tt>,
- *  and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy
- *  performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike
- *  \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result,
- *  calling \p copy_n with overlapping source and destination ranges has undefined
- *  behavior.
- *
- *  The return value is \p result + \p n.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range to copy.
- *  \param n The number of elements to copy.
- *  \param result The beginning destination range.
- *  \return The end of the destination range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam Size is an integral type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre \p result may be equal to \p first, but \p result shall not be in the range <tt>[first, first + n)</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p copy
- *  to copy from one range to another using the \p thrust::device parallelization policy:
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  size_t n = 100;
- *  thrust::device_vector<int> vec0(n);
- *  thrust::device_vector<int> vec1(n);
- *  ...
- *  thrust::copy_n(thrust::device, vec0.begin(), n, vec1.begin());
- *
- *  // vec1 is now a copy of vec0
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/copy_n.html
- *  \see thrust::copy
- */
-template<typename DerivedPolicy, typename InputIterator, typename Size, typename OutputIterator>
-  OutputIterator copy_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-
-	
-/*! \p copy copies elements from the range [\p first, \p last) to the range
- *  [\p result, \p result + (\p last - \p first)). That is, it performs
- *  the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1),
- *  and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy
- *  performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike
- *  \c std::copy, \p copy offers no guarantee on order of operation.  As a result,
- *  calling \p copy with overlapping source and destination ranges has undefined
- *  behavior.
- *
- *  The return value is \p result + (\p last - \p first).
- *
- *  \param first The beginning of the sequence to copy.
- *  \param last The end of the sequence to copy.
- *  \param result The destination sequence.
- *  \return The end of the destination sequence.
- *  \see http://www.sgi.com/tech/stl/copy.html
- *
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre \p result may be equal to \p first, but \p result shall not be in the range <tt>[first, last)</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p copy
- *  to copy from one range to another.
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *
- *  thrust::device_vector<int> vec0(100);
- *  thrust::device_vector<int> vec1(100);
- *  ...
- *
- *  thrust::copy(vec0.begin(), vec0.end(),
- *               vec1.begin());
- *
- *  // vec1 is now a copy of vec0
- *  \endcode
- */
-template<typename InputIterator, typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-/*! \p copy_n copies elements from the range <tt>[first, first + n)</tt> to the range
- *  <tt>[result, result + n)</tt>. That is, it performs the assignments <tt>*result = *first, *(result + 1) = *(first + 1)</tt>,
- *  and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy
- *  performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike
- *  \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result,
- *  calling \p copy_n with overlapping source and destination ranges has undefined
- *  behavior.
- *
- *  The return value is \p result + \p n.
- *
- *  \param first The beginning of the range to copy.
- *  \param n The number of elements to copy.
- *  \param result The beginning destination range.
- *  \return The end of the destination range.
- *
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam Size is an integral type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre \p result may be equal to \p first, but \p result shall not be in the range <tt>[first, first + n)</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p copy
- *  to copy from one range to another.
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  size_t n = 100;
- *  thrust::device_vector<int> vec0(n);
- *  thrust::device_vector<int> vec1(n);
- *  ...
- *  thrust::copy_n(vec0.begin(), n, vec1.begin());
- *
- *  // vec1 is now a copy of vec0
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/copy_n.html
- *  \see thrust::copy
- */
-template<typename InputIterator, typename Size, typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-/*! \} // end copying
- */
-
-/*! \addtogroup stream_compaction
- *  \{
- */
-
-
-/*! This version of \p copy_if copies elements from the range <tt>[first,last)</tt>
- *  to a range beginning at \ presult, except that any element which causes \p pred
- *  to be \p pred to be \c false is not copied.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p copy_if performs the assignment <tt>*result = *(first+n)</tt> and \p result
- *  is advanced one position if <tt>pred(*(first+n))</tt>. Otherwise, no assignment
- *  occurs and \p result is not advanced.
- *
- *  The algorithm's execution is parallelized as determined by \p system.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence from which to copy.
- *  \param last The end of the sequence from which to copy.
- *  \param result The beginning of the sequence into which to copy.
- *  \param pred The predicate to test on every value of the range <tt>[first, last)</tt>.
- *  \return <tt>result + n</tt>, where \c n is equal to the number of times \p pred
- *          evaluated to \c true in the range <tt>[first, last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                        and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p copy_if to perform stream compaction
- *  to copy even numbers to an output range using the \p thrust::host parallelization policy:
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[4];
- *
- *  thrust::copy_if(thrust::host, V, V + N, result, is_even());
- *
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-2, 0, 0, 2}
- *  \endcode
- *
- *  \see \c remove_copy_if
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-
-/*! This version of \p copy_if copies elements from the range <tt>[first,last)</tt>
- *  to a range beginning at \ presult, except that any element which causes \p pred
- *  to be \p pred to be \c false is not copied.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p copy_if performs the assignment <tt>*result = *(first+n)</tt> and \p result
- *  is advanced one position if <tt>pred(*(first+n))</tt>. Otherwise, no assignment
- *  occurs and \p result is not advanced.
- *
- *  \param first The beginning of the sequence from which to copy.
- *  \param last The end of the sequence from which to copy.
- *  \param result The beginning of the sequence into which to copy.
- *  \param pred The predicate to test on every value of the range <tt>[first, last)</tt>.
- *  \return <tt>result + n</tt>, where \c n is equal to the number of times \p pred
- *          evaluated to \c true in the range <tt>[first, last)</tt>.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                        and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p copy_if to perform stream compaction
- *  to copy even numbers to an output range.
- *
- *  \code
- *  #include <thrust/copy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[4];
- *
- *  thrust::copy_if(V, V + N, result, is_even());
- *
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-2, 0, 0, 2}
- *  \endcode
- *
- *  \see \c remove_copy_if
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-/*! This version of \p copy_if copies elements from the range <tt>[first,last)</tt>
- *  to a range beginning at \p result, except that any element whose corresponding stencil
- *  element causes \p pred to be \c false is not copied.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p copy_if performs the assignment <tt>*result = *(first+n)</tt> and \p result
- *  is advanced one position if <tt>pred(*(stencil+n))</tt>. Otherwise, no assignment
- *  occurs and \p result is not advanced.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence from which to copy.
- *  \param last The end of the sequence from which to copy.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the sequence into which to copy.
- *  \param pred The predicate to test on every value of the range <tt>[stencil, stencil + (last-first))</tt>.
- *  \return <tt>result + n</tt>, where \c n is equal to the number of times \p pred
- *          evaluated to \c true in the range <tt>[stencil, stencil + (last-first))</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                         and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *  \pre The ranges <tt>[stencil, stencil + (last - first))</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p copy_if to perform stream compaction
- *  to copy numbers to an output range when corresponding stencil elements are even using the \p thrust::host execution policy:
- *
- *  \code
- *  #include <thrust/copy.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int N = 6;
- *  int data[N]    = { 0, 1,  2, 3, 4, 5};
- *  int stencil[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[4];
- *
- *  thrust::copy_if(thrust::host, data, data + N, stencil, result, is_even());
- *
- *  // data remains    = { 0, 1,  2, 3, 4, 5};
- *  // stencil remains = {-2, 0, -1, 0, 1, 2};
- *  // result is now     { 0, 1,  3, 5}
- *  \endcode
- *
- *  \see \c remove_copy_if
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-/*! This version of \p copy_if copies elements from the range <tt>[first,last)</tt>
- *  to a range beginning at \p result, except that any element whose corresponding stencil
- *  element causes \p pred to be \c false is not copied.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p copy_if performs the assignment <tt>*result = *(first+n)</tt> and \p result
- *  is advanced one position if <tt>pred(*(stencil+n))</tt>. Otherwise, no assignment
- *  occurs and \p result is not advanced.
- *
- *  \param first The beginning of the sequence from which to copy.
- *  \param last The end of the sequence from which to copy.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the sequence into which to copy.
- *  \param pred The predicate to test on every value of the range <tt>[stencil, stencil + (last-first))</tt>.
- *  \return <tt>result + n</tt>, where \c n is equal to the number of times \p pred
- *          evaluated to \c true in the range <tt>[stencil, stencil + (last-first))</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                         and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *  \pre The ranges <tt>[stencil, stencil + (last - first))</tt> and <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p copy_if to perform stream compaction
- *  to copy numbers to an output range when corresponding stencil elements are even:
- *
- *  \code
- *  #include <thrust/copy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int N = 6;
- *  int data[N]    = { 0, 1,  2, 3, 4, 5};
- *  int stencil[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[4];
- *
- *  thrust::copy_if(data, data + N, stencil, result, is_even());
- *
- *  // data remains    = { 0, 1,  2, 3, 4, 5};
- *  // stencil remains = {-2, 0, -1, 0, 1, 2};
- *  // result is now     { 0, 1,  3, 5}
- *  \endcode
- *
- *  \see \c remove_copy_if
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-/*! \} // end stream_compaction
- */
-	
-} // end namespace thrust
-
-#include <thrust/detail/copy.h>
-#include <thrust/detail/copy_if.h>
-
diff --git a/compat/thrust/count.h b/compat/thrust/count.h
deleted file mode 100644
index cddd1dd68b..0000000000
--- a/compat/thrust/count.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file count.h
- *  \brief Counting elements in a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup reductions
- *  \ingroup algorithms
- *  \{
- */
-
-/*! \addtogroup counting
- *  \ingroup reductions
- *  \{
- */
-
-
-/*! \p count finds the number of elements in <tt>[first,last)</tt> that are equal
- *  to \p value. More precisely, \p count returns the number of iterators \c i in
- *  <tt>[first, last)</tt> such that <tt>*i == value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param value The value to be counted.
- *  \return The number of elements equal to \p value.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be a model of must be a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam EqualityComparable must be a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a> and can be compared for equality with \c InputIterator's \c value_type
- *
- *  The following code snippet demonstrates how to use \p count to 
- *  count the number of instances in a range of a value of interest using the \p thrust::device execution policy:
- *
- *  \code
- *  #include <thrust/count.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  // put 3 1s in a device_vector
- *  thrust::device_vector<int> vec(5,0);
- *  vec[1] = 1;
- *  vec[3] = 1;
- *  vec[4] = 1;
- *  
- *  // count the 1s
- *  int result = thrust::count(thrust::device, vec.begin(), vec.end(), 1);
- *  // result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/count.html
- */
-template<typename DerivedPolicy, typename InputIterator, typename EqualityComparable>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, const EqualityComparable& value);
-
-
-
-/*! \p count finds the number of elements in <tt>[first,last)</tt> that are equal
- *  to \p value. More precisely, \p count returns the number of iterators \c i in
- *  <tt>[first, last)</tt> such that <tt>*i == value</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param value The value to be counted.
- *  \return The number of elements equal to \p value.
- *
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be a model of must be a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam EqualityComparable must be a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a> and can be compared for equality with \c InputIterator's \c value_type
- *
- *  The following code snippet demonstrates how to use \p count to 
- *  count the number of instances in a range of a value of interest.
- *  \code
- *  #include <thrust/count.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  // put 3 1s in a device_vector
- *  thrust::device_vector<int> vec(5,0);
- *  vec[1] = 1;
- *  vec[3] = 1;
- *  vec[4] = 1;
- *  
- *  // count the 1s
- *  int result = thrust::count(vec.begin(), vec.end(), 1);
- *  // result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/count.html
- */
-template <typename InputIterator, typename EqualityComparable>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count(InputIterator first, InputIterator last, const EqualityComparable& value);
-
-
-/*! \p count_if finds the number of elements in <tt>[first,last)</tt> for which 
- *  a predicate is \c true. More precisely, \p count_if returns the number of iterators
- *  \c i in <tt>[first, last)</tt> such that <tt>pred(*i) == true</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param pred The predicate.
- *  \return The number of elements where \p pred is \c true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p count to
- *  count the number of odd numbers in a range using the \p thrust::device execution policy:
- *
- *  \code
- *  #include <thrust/count.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_odd
- *  {
- *    __host__ __device__
- *    bool operator()(int &x)
- *    {
- *      return x & 1;
- *    }
- *  };
- *  ...
- *  // fill a device_vector with even & odd numbers
- *  thrust::device_vector<int> vec(5);
- *  vec[0] = 0;
- *  vec[1] = 1;
- *  vec[2] = 2;
- *  vec[3] = 3;
- *  vec[4] = 4;
- *
- *  // count the odd elements in vec
- *  int result = thrust::count_if(thrust::device, vec.begin(), vec.end(), is_odd());
- *  // result == 2
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/count.html
- */
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \p count_if finds the number of elements in <tt>[first,last)</tt> for which 
- *  a predicate is \c true. More precisely, \p count_if returns the number of iterators
- *  \c i in <tt>[first, last)</tt> such that <tt>pred(*i) == true</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param pred The predicate.
- *  \return The number of elements where \p pred is \c true.
- *
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p count to
- *  count the number of odd numbers in a range.
- *  \code
- *  #include <thrust/count.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  struct is_odd
- *  {
- *    __host__ __device__
- *    bool operator()(int &x)
- *    {
- *      return x & 1;
- *    }
- *  };
- *  ...
- *  // fill a device_vector with even & odd numbers
- *  thrust::device_vector<int> vec(5);
- *  vec[0] = 0;
- *  vec[1] = 1;
- *  vec[2] = 2;
- *  vec[3] = 3;
- *  vec[4] = 4;
- *
- *  // count the odd elements in vec
- *  int result = thrust::count_if(vec.begin(), vec.end(), is_odd());
- *  // result == 2
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/count.html
- */
-template <typename InputIterator, typename Predicate>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count_if(InputIterator first, InputIterator last, Predicate pred);
-
-/*! \} // end counting
- *  \} // end reductions
- */
-
-} // end thrust
-
-#include <thrust/detail/count.inl>
-
diff --git a/compat/thrust/detail/adjacent_difference.inl b/compat/thrust/detail/adjacent_difference.inl
deleted file mode 100644
index 6590f9d15d..0000000000
--- a/compat/thrust/detail/adjacent_difference.inl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.inl
- *  \brief Inline file for adjacent_difference.h
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/adjacent_difference.h>
-#include <thrust/system/detail/adl/adjacent_difference.h>
-
-namespace thrust
-{
-
-
-template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
-OutputIterator adjacent_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last, 
-                                   OutputIterator result)
-{
-  using thrust::system::detail::generic::adjacent_difference;
-
-  return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end adjacent_difference()
-
-
-template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename BinaryFunction>
-OutputIterator adjacent_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last, 
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::adjacent_difference;
-
-  return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op);
-} // end adjacent_difference()
-
-
-template <typename InputIterator, typename OutputIterator>
-OutputIterator adjacent_difference(InputIterator first, InputIterator last, 
-                                   OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::adjacent_difference(select_system(system1, system2), first, last, result);
-} // end adjacent_difference()
-
-
-template <typename InputIterator, typename OutputIterator, typename BinaryFunction>
-OutputIterator adjacent_difference(InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::adjacent_difference(select_system(system1, system2), first, last, result, binary_op);
-} // end adjacent_difference()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/advance.inl b/compat/thrust/detail/advance.inl
deleted file mode 100644
index 2907be7534..0000000000
--- a/compat/thrust/detail/advance.inl
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file advance.inl
- *  \brief Inline file for advance.h
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/advance.h>
-#include <thrust/system/detail/generic/advance.h>
-
-namespace thrust
-{
-
-
-template <typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n)
-{
-  thrust::system::detail::generic::advance(i, n);
-} // end advance()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/allocator/allocator_traits.h b/compat/thrust/detail/allocator/allocator_traits.h
deleted file mode 100644
index 6ee99b453f..0000000000
--- a/compat/thrust/detail/allocator/allocator_traits.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/detail/type_traits/has_nested_type.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_pointer, pointer)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_pointer, const_pointer)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_reference, reference)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_reference, const_reference)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_void_pointer, void_pointer)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_void_pointer, const_void_pointer)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_difference_type, difference_type)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_size_type, size_type)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_copy_assignment, propagate_on_container_copy_assignment)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_move_assignment, propagate_on_container_move_assignment)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_swap, propagate_on_container_swap)
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_system_type, system_type)
-
-template<typename T>
-  struct nested_pointer
-{
-  typedef typename T::pointer type;
-};
-
-template<typename T>
-  struct nested_const_pointer
-{
-  typedef typename T::const_pointer type;
-};
-
-template<typename T>
-  struct nested_reference
-{
-  typedef typename T::reference type;
-};
-
-template<typename T>
-  struct nested_const_reference
-{
-  typedef typename T::const_reference type;
-};
-
-template<typename T>
-  struct nested_void_pointer
-{
-  typedef typename T::void_pointer type;
-};
-
-template<typename T>
-  struct nested_const_void_pointer
-{
-  typedef typename T::const_void_pointer type;
-};
-
-template<typename T>
-  struct nested_difference_type
-{
-  typedef typename T::difference_type type;
-};
-
-template<typename T>
-  struct nested_size_type
-{
-  typedef typename T::size_type type;
-};
-
-template<typename T>
-  struct nested_propagate_on_container_copy_assignment
-{
-  typedef typename T::propagate_on_container_copy_assignment type;
-};
-
-template<typename T>
-  struct nested_propagate_on_container_move_assignment
-{
-  typedef typename T::propagate_on_container_move_assignment type;
-};
-
-template<typename T>
-  struct nested_propagate_on_container_swap
-{
-  typedef typename T::propagate_on_container_swap type;
-};
-
-template<typename T>
-  struct nested_system_type
-{
-  typedef typename T::system_type type;
-};
-
-} // end allocator_traits_detail
-
-
-template<typename Alloc>
-  struct allocator_traits
-{
-  typedef Alloc allocator_type;
-
-  typedef typename allocator_type::value_type value_type;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_pointer<allocator_type>::value,
-    allocator_traits_detail::nested_pointer<allocator_type>,
-    identity_<value_type*>
-  >::type pointer;
-
-  private:
-    template<typename T>
-      struct rebind_pointer
-    {
-      typedef typename pointer_traits<pointer>::template rebind<T>::other type;
-    };
-
-  public:
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_const_pointer<allocator_type>::value,
-    allocator_traits_detail::nested_const_pointer<allocator_type>,
-    rebind_pointer<const value_type>
-  >::type const_pointer;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_void_pointer<allocator_type>::value,
-    allocator_traits_detail::nested_void_pointer<allocator_type>,
-    rebind_pointer<void>
-  >::type void_pointer;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_const_void_pointer<allocator_type>::value,
-    allocator_traits_detail::nested_const_void_pointer<allocator_type>,
-    rebind_pointer<const void>
-  >::type const_void_pointer;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_difference_type<allocator_type>::value,
-    allocator_traits_detail::nested_difference_type<allocator_type>,
-    pointer_difference<pointer>
-  >::type difference_type;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_size_type<allocator_type>::value,
-    allocator_traits_detail::nested_size_type<allocator_type>,
-    make_unsigned<difference_type>
-  >::type size_type;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_propagate_on_container_copy_assignment<allocator_type>::value,
-    allocator_traits_detail::nested_propagate_on_container_copy_assignment<allocator_type>,
-    identity_<false_type>
-  >::type propagate_on_container_copy_assignment;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_propagate_on_container_move_assignment<allocator_type>::value,
-    allocator_traits_detail::nested_propagate_on_container_move_assignment<allocator_type>,
-    identity_<false_type>
-  >::type propagate_on_container_move_assignment;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_propagate_on_container_swap<allocator_type>::value,
-    allocator_traits_detail::nested_propagate_on_container_swap<allocator_type>,
-    identity_<false_type>
-  >::type propagate_on_container_swap;
-
-  typedef typename eval_if<
-    allocator_traits_detail::has_system_type<allocator_type>::value,
-    allocator_traits_detail::nested_system_type<allocator_type>,
-    thrust::iterator_system<pointer>
-  >::type system_type;
-
-  // XXX rebind and rebind_traits are alias templates
-  //     and so are omitted while c++11 is unavailable
-
-  inline static pointer allocate(allocator_type &a, size_type n);
-
-  inline static pointer allocate(allocator_type &a, size_type n, const_void_pointer hint);
-
-  inline static void deallocate(allocator_type &a, pointer p, size_type n);
-
-  // XXX should probably change T* to pointer below and then relax later
-
-  template<typename T>
-  inline __host__ __device__ static void construct(allocator_type &a, T *p);
-  
-  template<typename T, typename Arg1>
-  inline __host__ __device__ static void construct(allocator_type &a, T *p, const Arg1 &arg1);
-
-  template<typename T>
-  inline __host__ __device__ static void destroy(allocator_type &a, T *p);
-
-  inline static size_type max_size(const allocator_type &a);
-}; // end allocator_traits
-
-
-// XXX consider moving this non-standard functionality inside allocator_traits
-template<typename Alloc>
-  struct allocator_system
-{
-  // the type of the allocator's system
-  typedef typename eval_if<
-    allocator_traits_detail::has_system_type<Alloc>::value,
-    allocator_traits_detail::nested_system_type<Alloc>,
-    thrust::iterator_system<
-      typename allocator_traits<Alloc>::pointer
-    >
-  >::type type;
-
-  inline static type &get(Alloc &a);
-};
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/allocator_traits.inl>
-
diff --git a/compat/thrust/detail/allocator/allocator_traits.inl b/compat/thrust/detail/allocator/allocator_traits.inl
deleted file mode 100644
index 83193355d5..0000000000
--- a/compat/thrust/detail/allocator/allocator_traits.inl
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/type_traits/has_member_function.h>
-#include <thrust/detail/type_traits/is_call_possible.h>
-#include <new>
-#include <limits>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_allocate_with_hint_impl, allocate)
-
-template<typename Alloc>
-  class has_member_allocate_with_hint
-{
-  typedef typename allocator_traits<Alloc>::pointer            pointer;
-  typedef typename allocator_traits<Alloc>::size_type          size_type;
-  typedef typename allocator_traits<Alloc>::const_void_pointer const_void_pointer;
-
-  public:
-    typedef typename has_member_allocate_with_hint_impl<Alloc, pointer(size_type,const_void_pointer)>::type type;
-    static const bool value = type::value;
-};
-
-template<typename Alloc>
-  typename enable_if<
-    has_member_allocate_with_hint<Alloc>::value,
-    typename allocator_traits<Alloc>::pointer
-  >::type
-    allocate(Alloc &a, typename allocator_traits<Alloc>::size_type n, typename allocator_traits<Alloc>::const_void_pointer hint)
-{
-  return a.allocate(n,hint);
-}
-
-template<typename Alloc>
-  typename disable_if<
-    has_member_allocate_with_hint<Alloc>::value,
-    typename allocator_traits<Alloc>::pointer
-  >::type
-    allocate(Alloc &a, typename allocator_traits<Alloc>::size_type n, typename allocator_traits<Alloc>::const_void_pointer)
-{
-  return a.allocate(n);
-}
-
-
-__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct1_impl, construct)
-
-template<typename Alloc, typename T>
-  struct has_member_construct1
-    : has_member_construct1_impl<Alloc, void(T*)>
-{};
-
-template<typename Alloc, typename T>
-  inline __host__ __device__
-    typename enable_if<
-      has_member_construct1<Alloc,T>::value
-    >::type
-      construct(Alloc &a, T *p)
-{
-  a.construct(p);
-}
-
-template<typename Alloc, typename T>
-  inline __host__ __device__
-    typename disable_if<
-      has_member_construct1<Alloc,T>::value
-    >::type
-      construct(Alloc &a, T *p)
-{
-  ::new(static_cast<void*>(p)) T();
-}
-
-
-__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct2_impl, construct)
-
-template<typename Alloc, typename T, typename Arg1>
-  struct has_member_construct2
-    : has_member_construct2_impl<Alloc, void(T*,const Arg1 &)>
-{};
-
-template<typename Alloc, typename T, typename Arg1>
-  inline __host__ __device__
-    typename enable_if<
-      has_member_construct2<Alloc,T,Arg1>::value
-    >::type
-      construct(Alloc &a, T *p, const Arg1 &arg1)
-{
-  a.construct(p,arg1);
-}
-
-template<typename Alloc, typename T, typename Arg1>
-  inline __host__ __device__
-    typename disable_if<
-      has_member_construct2<Alloc,T,Arg1>::value
-    >::type
-      construct(Alloc &, T *p, const Arg1 &arg1)
-{
-  ::new(static_cast<void*>(p)) T(arg1);
-}
-
-
-__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_destroy_impl, destroy)
-
-template<typename Alloc, typename T>
-  struct has_member_destroy
-    : has_member_destroy_impl<Alloc, void(T*)>
-{};
-
-template<typename Alloc, typename T>
-  inline __host__ __device__
-    typename enable_if<
-      has_member_destroy<Alloc,T>::value
-    >::type
-      destroy(Alloc &a, T *p)
-{
-  a.destroy(p);
-}
-
-template<typename Alloc, typename T>
-  inline __host__ __device__
-    typename disable_if<
-      has_member_destroy<Alloc,T>::value
-    >::type
-      destroy(Alloc &, T *p)
-{
-  p->~T();
-}
-
-
-__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_max_size_impl, max_size)
-
-template<typename Alloc>
-  class has_member_max_size
-{
-  typedef typename allocator_traits<Alloc>::size_type size_type;
-
-  public:
-    typedef typename has_member_max_size_impl<Alloc, size_type(void)>::type type;
-    static const bool value = type::value;
-};
-
-template<typename Alloc>
-  typename enable_if<
-    has_member_max_size<Alloc>::value,
-    typename allocator_traits<Alloc>::size_type
-  >::type
-    max_size(const Alloc &a)
-{
-  return a.max_size();
-}
-
-template<typename Alloc>
-  typename disable_if<
-    has_member_max_size<Alloc>::value,
-    typename allocator_traits<Alloc>::size_type
-  >::type
-    max_size(const Alloc &a)
-{
-  typedef typename allocator_traits<Alloc>::size_type size_type;
-  return std::numeric_limits<size_type>::max();
-}
-
-__THRUST_DEFINE_HAS_MEMBER_FUNCTION(has_member_system_impl, system)
-
-template<typename Alloc>
-  class has_member_system
-{
-  typedef typename allocator_system<Alloc>::type system_type;
-
-  public:
-    typedef typename has_member_system_impl<Alloc, system_type&(void)>::type type;
-    static const bool value = type::value;
-};
-
-template<typename Alloc>
-  typename enable_if<
-    has_member_system<Alloc>::value,
-    typename allocator_system<Alloc>::type &
-  >::type
-    system(Alloc &a)
-{
-  return a.system();
-}
-
-template<typename Alloc>
-  typename disable_if<
-    has_member_system<Alloc>::value,
-    typename allocator_system<Alloc>::type &
-  >::type
-    system(Alloc &a)
-{
-  // assumes the system is default-constructible
-  static typename allocator_system<Alloc>::type state;
-  return state;
-}
-
-
-} // end allocator_traits_detail
-
-
-template<typename Alloc>
-  typename allocator_traits<Alloc>::pointer
-    allocator_traits<Alloc>
-      ::allocate(Alloc &a, typename allocator_traits<Alloc>::size_type n)
-{
-  return a.allocate(n);
-}
-
-template<typename Alloc>
-  typename allocator_traits<Alloc>::pointer
-    allocator_traits<Alloc>
-      ::allocate(Alloc &a, typename allocator_traits<Alloc>::size_type n, typename allocator_traits<Alloc>::const_void_pointer hint)
-{
-  return allocator_traits_detail::allocate(a, n, hint);
-}
-
-template<typename Alloc>
-  void allocator_traits<Alloc>
-    ::deallocate(Alloc &a, typename allocator_traits<Alloc>::pointer p, typename allocator_traits<Alloc>::size_type n)
-{
-  return a.deallocate(p,n);
-}
-
-template<typename Alloc>
-  template<typename T>
-    void allocator_traits<Alloc>
-      ::construct(allocator_type &a, T *p)
-{
-  return allocator_traits_detail::construct(a,p);
-}
-
-template<typename Alloc>
-  template<typename T, typename Arg1>
-    void allocator_traits<Alloc>
-      ::construct(allocator_type &a, T *p, const Arg1 &arg1)
-{
-  return allocator_traits_detail::construct(a,p,arg1);
-}
-
-template<typename Alloc>
-  template<typename T>
-    void allocator_traits<Alloc>
-      ::destroy(allocator_type &a, T *p)
-{
-  return allocator_traits_detail::destroy(a,p);
-}
-
-template<typename Alloc>
-  typename allocator_traits<Alloc>::size_type
-    allocator_traits<Alloc>
-      ::max_size(const allocator_type &a)
-{
-  return allocator_traits_detail::max_size(a);
-}
-
-template<typename Alloc>
-  typename allocator_system<Alloc>::type &
-    allocator_system<Alloc>
-      ::get(Alloc &a)
-{
-  return allocator_traits_detail::system(a);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/copy_construct_range.h b/compat/thrust/detail/allocator/copy_construct_range.h
deleted file mode 100644
index 5d99e1fa14..0000000000
--- a/compat/thrust/detail/allocator/copy_construct_range.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename System, typename Allocator, typename InputIterator, typename Pointer>
-  Pointer copy_construct_range(thrust::execution_policy<System> &from_system,
-                               Allocator &a,
-                               InputIterator first,
-                               InputIterator last,
-                               Pointer result);
-
-template<typename System, typename Allocator, typename InputIterator, typename Size, typename Pointer>
-  Pointer copy_construct_range_n(thrust::execution_policy<System> &from_system,
-                                 Allocator &a,
-                                 InputIterator first,
-                                 Size n,
-                                 Pointer result);
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/copy_construct_range.inl>
-
diff --git a/compat/thrust/detail/allocator/copy_construct_range.inl b/compat/thrust/detail/allocator/copy_construct_range.inl
deleted file mode 100644
index 7c5478b65b..0000000000
--- a/compat/thrust/detail/allocator/copy_construct_range.inl
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/copy.h>
-#include <thrust/tuple.h>
-#include <thrust/advance.h>
-#include <thrust/distance.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/for_each.h>
-#include <memory>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-
-template<typename Allocator, typename InputType, typename OutputType>
-  struct copy_construct_with_allocator
-{
-  Allocator &a;
-
-  copy_construct_with_allocator(Allocator &a)
-    : a(a)
-  {}
-
-  template<typename Tuple>
-  inline __host__ __device__
-  void operator()(Tuple t)
-  {
-    const InputType &in = thrust::get<0>(t);
-    OutputType &out = thrust::get<1>(t);
-
-    allocator_traits<Allocator>::construct(a, &out, in);
-  }
-};
-
-
-template<typename Allocator, typename T>
-  struct needs_copy_construct_via_allocator
-    : has_member_construct2<
-        Allocator,
-        T,
-        T
-      >
-{};
-
-
-// we know that std::allocator::construct's only effect is to call T's
-// copy constructor, so we needn't use it for copy construction
-template<typename U, typename T>
-  struct needs_copy_construct_via_allocator<std::allocator<U>, T>
-    : thrust::detail::false_type
-{};
-
-
-// XXX it's regrettable that this implementation is copied almost
-//     exactly from system::detail::generic::uninitialized_copy
-//     perhaps generic::uninitialized_copy could call this routine
-//     with a default allocator
-template<typename Allocator, typename FromSystem, typename ToSystem, typename InputIterator, typename Pointer>
-  typename enable_if_convertible<
-    FromSystem,
-    ToSystem,
-    Pointer
-  >::type
-    uninitialized_copy_with_allocator(Allocator &a,
-                                      thrust::execution_policy<FromSystem> &from_system,
-                                      thrust::execution_policy<ToSystem> &to_system,
-                                      InputIterator first,
-                                      InputIterator last,
-                                      Pointer result)
-{
-  // zip up the iterators
-  typedef thrust::tuple<InputIterator,Pointer> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple>  ZipIterator;
-
-  ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result));
-  ZipIterator end = begin;
-
-  // get a zip_iterator pointing to the end
-  const typename thrust::iterator_difference<InputIterator>::type n = thrust::distance(first,last);
-  thrust::advance(end,n);
-
-  // create a functor
-  typedef typename iterator_traits<InputIterator>::value_type InputType;
-  typedef typename iterator_traits<Pointer>::value_type       OutputType;
-
-  // do the for_each
-  // note we use to_system to dispatch the for_each
-  thrust::for_each(to_system, begin, end, copy_construct_with_allocator<Allocator,InputType,OutputType>(a));
-
-  // return the end of the output range
-  return thrust::get<1>(end.get_iterator_tuple());
-}
-
-
-// XXX it's regrettable that this implementation is copied almost
-//     exactly from system::detail::generic::uninitialized_copy_n
-//     perhaps generic::uninitialized_copy_n could call this routine
-//     with a default allocator
-template<typename Allocator, typename FromSystem, typename ToSystem, typename InputIterator, typename Size, typename Pointer>
-  typename enable_if_convertible<
-    FromSystem,
-    ToSystem,
-    Pointer
-  >::type
-    uninitialized_copy_with_allocator_n(Allocator &a,
-                                        thrust::execution_policy<FromSystem> &from_system,
-                                        thrust::execution_policy<ToSystem> &to_system,
-                                        InputIterator first,
-                                        Size n,
-                                        Pointer result)
-{
-  // zip up the iterators
-  typedef thrust::tuple<InputIterator,Pointer> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple>  ZipIterator;
-
-  ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result));
-
-  // create a functor
-  typedef typename iterator_traits<InputIterator>::value_type InputType;
-  typedef typename iterator_traits<Pointer>::value_type       OutputType;
-
-  // do the for_each_n
-  // note we use to_system to dispatch the for_each_n
-  ZipIterator end = thrust::for_each_n(to_system, begin, n, copy_construct_with_allocator<Allocator,InputType,OutputType>(a));
-
-  // return the end of the output range
-  return thrust::get<1>(end.get_iterator_tuple());
-}
-
-
-template<typename Allocator, typename FromSystem, typename ToSystem, typename InputIterator, typename Pointer>
-  typename disable_if_convertible<
-    FromSystem,
-    ToSystem,
-    Pointer
-  >::type
-    uninitialized_copy_with_allocator(Allocator &,
-                                      thrust::execution_policy<FromSystem> &from_system,
-                                      thrust::execution_policy<ToSystem> &to_system,
-                                      InputIterator first,
-                                      InputIterator last,
-                                      Pointer result)
-{
-  // the systems aren't trivially interoperable
-  // just call two_system_copy and hope for the best
-  return thrust::detail::two_system_copy(from_system, to_system, first, last, result);
-} // end uninitialized_copy_with_allocator()
-
-
-template<typename Allocator, typename FromSystem, typename ToSystem, typename InputIterator, typename Size, typename Pointer>
-  typename disable_if_convertible<
-    FromSystem,
-    ToSystem,
-    Pointer
-  >::type
-    uninitialized_copy_with_allocator_n(Allocator &,
-                                        thrust::execution_policy<FromSystem> &from_system,
-                                        thrust::execution_policy<ToSystem> &to_system,
-                                        InputIterator first,
-                                        Size n,
-                                        Pointer result)
-{
-  // the systems aren't trivially interoperable
-  // just call two_system_copy_n and hope for the best
-  return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result);
-} // end uninitialized_copy_with_allocator_n()
-
-
-template<typename FromSystem, typename Allocator, typename InputIterator, typename Pointer>
-  typename disable_if<
-    needs_copy_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value,
-    Pointer
-  >::type
-    copy_construct_range(thrust::execution_policy<FromSystem> &from_system,
-                         Allocator &a,
-                         InputIterator first,
-                         InputIterator last,
-                         Pointer result)
-{
-  typename allocator_system<Allocator>::type &to_system = allocator_system<Allocator>::get(a);
-
-  // just call two_system_copy
-  return thrust::detail::two_system_copy(from_system, to_system, first, last, result);
-}
-
-
-template<typename FromSystem, typename Allocator, typename InputIterator, typename Size, typename Pointer>
-  typename disable_if<
-    needs_copy_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value,
-    Pointer
-  >::type
-    copy_construct_range_n(thrust::execution_policy<FromSystem> &from_system,
-                           Allocator &a,
-                           InputIterator first,
-                           Size n,
-                           Pointer result)
-{
-  typename allocator_system<Allocator>::type &to_system = allocator_system<Allocator>::get(a);
-
-  // just call two_system_copy_n
-  return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result);
-}
-
-
-template<typename FromSystem, typename Allocator, typename InputIterator, typename Pointer>
-  typename enable_if<
-    needs_copy_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value,
-    Pointer
-  >::type
-    copy_construct_range(thrust::execution_policy<FromSystem> &from_system,
-                         Allocator &a,
-                         InputIterator first,
-                         InputIterator last,
-                         Pointer result)
-{
-  typename allocator_system<Allocator>::type &to_system = allocator_system<Allocator>::get(a);
-  return uninitialized_copy_with_allocator(a, from_system, to_system, first, last, result);
-}
-
-
-template<typename FromSystem, typename Allocator, typename InputIterator, typename Size, typename Pointer>
-  typename enable_if<
-    needs_copy_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value,
-    Pointer
-  >::type
-    copy_construct_range_n(thrust::execution_policy<FromSystem> &from_system,
-                           Allocator &a,
-                           InputIterator first,
-                           Size n,
-                           Pointer result)
-{
-  typename allocator_system<Allocator>::type &to_system = allocator_system<Allocator>::get(a);
-  return uninitialized_copy_with_allocator_n(a, from_system, to_system, first, n, result);
-}
-
-
-} // end allocator_traits_detail
-
-
-template<typename System, typename Allocator, typename InputIterator, typename Pointer>
-  Pointer copy_construct_range(thrust::execution_policy<System> &from_system,
-                               Allocator &a,
-                               InputIterator first,
-                               InputIterator last,
-                               Pointer result)
-{
-  return allocator_traits_detail::copy_construct_range(from_system, a, first, last, result);
-}
-
-
-template<typename System, typename Allocator, typename InputIterator, typename Size, typename Pointer>
-  Pointer copy_construct_range_n(thrust::execution_policy<System> &from_system,
-                                 Allocator &a,
-                                 InputIterator first,
-                                 Size n,
-                                 Pointer result)
-{
-  return allocator_traits_detail::copy_construct_range_n(from_system, a, first, n, result);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/default_construct_range.h b/compat/thrust/detail/allocator/default_construct_range.h
deleted file mode 100644
index d83cb31f35..0000000000
--- a/compat/thrust/detail/allocator/default_construct_range.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename Allocator, typename Pointer, typename Size>
-inline void default_construct_range(Allocator &a, Pointer p, Size n);
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/default_construct_range.inl>
-
-
diff --git a/compat/thrust/detail/allocator/default_construct_range.inl b/compat/thrust/detail/allocator/default_construct_range.inl
deleted file mode 100644
index 45fe9c69a1..0000000000
--- a/compat/thrust/detail/allocator/default_construct_range.inl
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/for_each.h>
-#include <thrust/uninitialized_fill.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-
-template<typename Allocator>
-  struct construct1_via_allocator
-{
-  Allocator &a;
-
-  construct1_via_allocator(Allocator &a)
-    : a(a)
-  {}
-
-  template<typename T>
-  inline __host__ __device__
-  void operator()(T &x)
-  {
-    allocator_traits<Allocator>::construct(a, &x);
-  }
-};
-
-
-template<typename Allocator, typename T>
-  struct needs_default_construct_via_allocator
-    : has_member_construct1<
-        Allocator,
-        T
-      >
-{};
-
-
-// we know that std::allocator::construct's only effect is to call T's 
-// default constructor, so we needn't use it for default construction
-template<typename U, typename T>
-  struct needs_default_construct_via_allocator<std::allocator<U>, T>
-    : thrust::detail::false_type
-{};
-
-
-template<typename Allocator, typename Pointer, typename Size>
-  typename enable_if<
-    needs_default_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value
-  >::type
-    default_construct_range(Allocator &a, Pointer p, Size n)
-{
-  thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct1_via_allocator<Allocator>(a));
-}
-
-
-template<typename Allocator, typename Pointer, typename Size>
-  typename disable_if<
-    needs_default_construct_via_allocator<
-      Allocator,
-      typename pointer_element<Pointer>::type
-    >::value
-  >::type
-    default_construct_range(Allocator &a, Pointer p, Size n)
-{
-  thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, typename pointer_element<Pointer>::type());
-}
-
-
-} // end allocator_traits_detail
-
-
-template<typename Allocator, typename Pointer, typename Size>
-  void default_construct_range(Allocator &a, Pointer p, Size n)
-{
-  return allocator_traits_detail::default_construct_range(a,p,n);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/destroy_range.h b/compat/thrust/detail/allocator/destroy_range.h
deleted file mode 100644
index d690a60a79..0000000000
--- a/compat/thrust/detail/allocator/destroy_range.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename Allocator, typename Pointer, typename Size>
-  inline void destroy_range(Allocator &a, Pointer p, Size n);
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/destroy_range.inl>
-
diff --git a/compat/thrust/detail/allocator/destroy_range.inl b/compat/thrust/detail/allocator/destroy_range.inl
deleted file mode 100644
index ace222356f..0000000000
--- a/compat/thrust/detail/allocator/destroy_range.inl
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/allocator/destroy_range.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/for_each.h>
-#include <memory>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-
-// destroy_range has three cases:
-// if Allocator has an effectful member function destroy:
-//   1. destroy via the allocator
-// else
-//   2. if T has a non-trivial destructor, destroy the range without using the allocator
-//   3. if T has a trivial destructor, do a no-op
-
-template<typename Allocator, typename T>
-  struct has_effectful_member_destroy
-    : has_member_destroy<Allocator,T>
-{};
-
-// std::allocator::destroy's only effect is to invoke its argument's destructor
-template<typename U, typename T>
-  struct has_effectful_member_destroy<std::allocator<U>, T>
-    : thrust::detail::false_type
-{};
-
-// case 1: Allocator has an effectful 1-argument member function "destroy"
-template<typename Allocator, typename Pointer>
-  struct enable_if_destroy_range_case1
-    : thrust::detail::enable_if<
-        has_effectful_member_destroy<
-          Allocator,
-          typename pointer_element<Pointer>::type
-        >::value
-      >
-{};
-
-// case 2: Allocator has no member function "destroy", but T has a non-trivial destructor
-template<typename Allocator, typename Pointer>
-  struct enable_if_destroy_range_case2
-    : thrust::detail::enable_if<
-        !has_effectful_member_destroy<
-          Allocator,
-          typename pointer_element<Pointer>::type
-        >::value &&
-        !has_trivial_destructor<
-          typename pointer_element<Pointer>::type
-        >::value
-      >
-{};
-
-// case 3: Allocator has no member function "destroy", and T has a trivial destructor
-template<typename Allocator, typename Pointer>
-  struct enable_if_destroy_range_case3
-    : thrust::detail::enable_if<
-        !has_effectful_member_destroy<
-          Allocator,
-          typename pointer_element<Pointer>::type
-        >::value &&
-        has_trivial_destructor<
-          typename pointer_element<Pointer>::type
-        >::value
-      >
-{};
-
-
-
-template<typename Allocator>
-  struct destroy_via_allocator
-{
-  Allocator &a;
-
-  destroy_via_allocator(Allocator &a)
-    : a(a)
-  {}
-
-  template<typename T>
-  inline __host__ __device__
-  void operator()(T &x)
-  {
-    allocator_traits<Allocator>::destroy(a, &x);
-  }
-};
-
-
-// destroy_range case 1: destroy via allocator
-template<typename Allocator, typename Pointer, typename Size>
-  typename enable_if_destroy_range_case1<Allocator,Pointer>::type
-    destroy_range(Allocator &a, Pointer p, Size n)
-{
-  thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, destroy_via_allocator<Allocator>(a));
-}
-
-
-// we must prepare for His coming
-struct gozer
-{
-  template<typename T>
-  inline __host__ __device__
-  void operator()(T &x)
-  {
-    x.~T();
-  }
-};
-
-// destroy_range case 2: destroy without the allocator
-template<typename Allocator, typename Pointer, typename Size>
-  typename enable_if_destroy_range_case2<Allocator,Pointer>::type
-    destroy_range(Allocator &a, Pointer p, Size n)
-{
-  thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, gozer());
-}
-
-
-// destroy_range case 3: no-op
-template<typename Allocator, typename Pointer, typename Size>
-  typename enable_if_destroy_range_case3<Allocator,Pointer>::type
-    destroy_range(Allocator &, Pointer, Size)
-{
-  // no op
-}
-
-
-} // end allocator_traits_detail
-
-
-template<typename Allocator, typename Pointer, typename Size>
-  void destroy_range(Allocator &a, Pointer p, Size n)
-{
-  return allocator_traits_detail::destroy_range(a,p,n);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/fill_construct_range.h b/compat/thrust/detail/allocator/fill_construct_range.h
deleted file mode 100644
index 66fec416c3..0000000000
--- a/compat/thrust/detail/allocator/fill_construct_range.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename Allocator, typename Pointer, typename Size, typename T>
-inline void fill_construct_range(Allocator &a, Pointer p, Size n, const T &value);
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/fill_construct_range.inl>
-
diff --git a/compat/thrust/detail/allocator/fill_construct_range.inl b/compat/thrust/detail/allocator/fill_construct_range.inl
deleted file mode 100644
index e2c9c09c80..0000000000
--- a/compat/thrust/detail/allocator/fill_construct_range.inl
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/for_each.h>
-#include <thrust/uninitialized_fill.h>
-#include <memory>
-
-namespace thrust
-{
-namespace detail
-{
-namespace allocator_traits_detail
-{
-
-// fill_construct_range has 2 cases:
-// if Allocator has an effectful member function construct:
-//   1. construct via the allocator
-// else
-//   2. construct via uninitialized_fill
-
-template<typename Allocator, typename T, typename Arg1>
-  struct has_effectful_member_construct2
-    : has_member_construct2<Allocator,T,Arg1>
-{};
-
-// std::allocator::construct's only effect is to invoke placement new
-template<typename U, typename T, typename Arg1>
-  struct has_effectful_member_construct2<std::allocator<U>,T,Arg1>
-    : thrust::detail::false_type
-{};
-
-
-template<typename Allocator, typename Arg1>
-  struct construct2_via_allocator
-{
-  Allocator &a;
-  Arg1 arg;
-
-  construct2_via_allocator(Allocator &a, const Arg1 &arg)
-    : a(a), arg(arg)
-  {}
-
-  template<typename T>
-  inline __host__ __device__
-  void operator()(T &x)
-  {
-    allocator_traits<Allocator>::construct(a, &x, arg);
-  }
-};
-
-
-template<typename Allocator, typename Pointer, typename Size, typename T>
-  typename enable_if<
-    has_effectful_member_construct2<
-      Allocator,
-      typename pointer_element<Pointer>::type,
-      T
-    >::value
-  >::type
-    fill_construct_range(Allocator &a, Pointer p, Size n, const T &value)
-{
-  thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct2_via_allocator<Allocator,T>(a, value));
-}
-
-
-template<typename Allocator, typename Pointer, typename Size, typename T>
-  typename disable_if<
-    has_effectful_member_construct2<
-      Allocator,
-      typename pointer_element<Pointer>::type,
-      T
-    >::value
-  >::type
-    fill_construct_range(Allocator &a, Pointer p, Size n, const T &value)
-{
-  thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, value);
-}
-
-
-} // end allocator_traits_detail
-
-
-template<typename Alloc, typename Pointer, typename Size, typename T>
-  void fill_construct_range(Alloc &a, Pointer p, Size n, const T &value)
-{
-  return allocator_traits_detail::fill_construct_range(a,p,n,value);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/malloc_allocator.h b/compat/thrust/detail/allocator/malloc_allocator.h
deleted file mode 100644
index cf4567e419..0000000000
--- a/compat/thrust/detail/allocator/malloc_allocator.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/detail/allocator/tagged_allocator.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename T, typename System, typename Pointer>
-  class malloc_allocator
-    : public thrust::detail::tagged_allocator<
-               T, System, Pointer
-             >
-{
-  private:
-    typedef thrust::detail::tagged_allocator<
-      T, System, Pointer
-    > super_t;
-
-  public:
-    typedef typename super_t::pointer   pointer;
-    typedef typename super_t::size_type size_type;
-
-    pointer allocate(size_type cnt);
-
-    void deallocate(pointer p, size_type n);
-};
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/malloc_allocator.inl>
-
diff --git a/compat/thrust/detail/allocator/malloc_allocator.inl b/compat/thrust/detail/allocator/malloc_allocator.inl
deleted file mode 100644
index dd70202a04..0000000000
--- a/compat/thrust/detail/allocator/malloc_allocator.inl
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/malloc_allocator.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/bad_alloc.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/malloc_and_free.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename T, typename System, typename Pointer>
-  typename malloc_allocator<T,System,Pointer>::pointer
-    malloc_allocator<T,System,Pointer>
-      ::allocate(typename malloc_allocator<T,System,Pointer>::size_type cnt)
-{
-  using thrust::system::detail::generic::select_system;
-
-  // XXX should use a hypothetical thrust::static_pointer_cast here
-  System system;
-
-  pointer result = thrust::malloc<T>(select_system(system), cnt);
-
-  if(result.get() == 0)
-  {
-    throw thrust::system::detail::bad_alloc("malloc_allocator::allocate: malloc failed");
-  } // end if
-
-  return result;
-} // end malloc_allocator::allocate()
-
-
-template<typename T, typename System, typename Pointer>
-  void malloc_allocator<T,System,Pointer>
-    ::deallocate(typename malloc_allocator<T,System,Pointer>::pointer p, typename malloc_allocator<T,System,Pointer>::size_type n)
-{
-  using thrust::system::detail::generic::select_system;
-
-  System system;
-  thrust::free(select_system(system), p);
-} // end malloc_allocator
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/no_throw_allocator.h b/compat/thrust/detail/allocator/no_throw_allocator.h
deleted file mode 100644
index ce397dbdb5..0000000000
--- a/compat/thrust/detail/allocator/no_throw_allocator.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename BaseAllocator>
-  struct no_throw_allocator : BaseAllocator
-{
-  private:
-    typedef BaseAllocator super_t;
-  
-  public:
-    inline no_throw_allocator(const BaseAllocator &other = BaseAllocator())
-      : super_t(other)
-    {}
-
-    template<typename U>
-      struct rebind
-    {
-      typedef no_throw_allocator<typename super_t::template rebind<U>::other> other;
-    }; // end rebind
-
-    void deallocate(typename super_t::pointer p, typename super_t::size_type n)
-    {
-      try
-      {
-        super_t::deallocate(p, n);
-      } // end try
-      catch(...)
-      {
-        // catch anything
-      } // end catch
-    } // end deallocate()
-
-    inline bool operator==(no_throw_allocator const &other) { return super_t::operator==(other); }
-    inline bool operator!=(no_throw_allocator const &other) { return super_t::operator!=(other); }
-}; // end no_throw_allocator
-
-} // end detail
-} // end thrust
-
-
diff --git a/compat/thrust/detail/allocator/tagged_allocator.h b/compat/thrust/detail/allocator/tagged_allocator.h
deleted file mode 100644
index 3cb87a32bf..0000000000
--- a/compat/thrust/detail/allocator/tagged_allocator.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename T, typename Tag, typename Pointer> class tagged_allocator;
-
-template<typename Tag, typename Pointer>
-  class tagged_allocator<void, Tag, Pointer>
-{
-  public:
-    typedef void                                                                                 value_type;
-    typedef typename thrust::detail::pointer_traits<Pointer>::template rebind<void>::other       pointer;
-    typedef typename thrust::detail::pointer_traits<Pointer>::template rebind<const void>::other const_pointer;
-    typedef std::size_t                                                                          size_type;
-    typedef typename thrust::detail::pointer_traits<Pointer>::difference_type                    difference_type;
-    typedef Tag                                                                                  system_type;
-
-    template<typename U>
-      struct rebind
-    {
-      typedef tagged_allocator<U,Tag,Pointer> other;
-    }; // end rebind
-};
-
-template<typename T, typename Tag, typename Pointer>
-  class tagged_allocator
-{
-  public:
-    typedef T                                                                                 value_type;
-    typedef typename thrust::detail::pointer_traits<Pointer>::template rebind<T>::other       pointer;
-    typedef typename thrust::detail::pointer_traits<Pointer>::template rebind<const T>::other const_pointer;
-    typedef typename thrust::iterator_reference<pointer>::type                                reference;
-    typedef typename thrust::iterator_reference<const_pointer>::type                          const_reference;
-    typedef std::size_t                                                                       size_type;
-    typedef typename thrust::detail::pointer_traits<pointer>::difference_type                 difference_type;
-    typedef Tag                                                                               system_type;
-
-    template<typename U>
-      struct rebind
-    {
-      typedef tagged_allocator<U,Tag,Pointer> other;
-    }; // end rebind
-
-    __host__ __device__
-    inline tagged_allocator();
-
-    __host__ __device__
-    inline tagged_allocator(const tagged_allocator &);
-
-    template<typename U, typename OtherPointer>
-    __host__ __device__
-    inline tagged_allocator(const tagged_allocator<U, Tag, OtherPointer> &);
-
-    __host__ __device__
-    inline ~tagged_allocator();
-
-    __host__ __device__
-    pointer address(reference x) const;
-
-    __host__ __device__
-    const_pointer address(const_reference x) const;
-
-    size_type max_size() const;
-};
-
-template<typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
-__host__ __device__
-bool operator==(const tagged_allocator<T1,Pointer1,Tag> &, const tagged_allocator<T2,Pointer2,Tag> &);
-
-template<typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
-__host__ __device__
-bool operator!=(const tagged_allocator<T1,Pointer1,Tag> &, const tagged_allocator<T2,Pointer2,Tag> &);
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/tagged_allocator.inl>
-
diff --git a/compat/thrust/detail/allocator/tagged_allocator.inl b/compat/thrust/detail/allocator/tagged_allocator.inl
deleted file mode 100644
index cb362a840d..0000000000
--- a/compat/thrust/detail/allocator/tagged_allocator.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/tagged_allocator.h>
-#include <limits>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename T, typename Tag, typename Pointer>
-  tagged_allocator<T,Tag,Pointer>
-    ::tagged_allocator()
-{}
-
-
-template<typename T, typename Tag, typename Pointer>
-  tagged_allocator<T,Tag,Pointer>
-    ::tagged_allocator(const tagged_allocator<T,Tag,Pointer> &)
-{}
-
-
-template<typename T, typename Tag, typename Pointer>
-  template<typename U, typename OtherPointer>
-    tagged_allocator<T,Tag,Pointer>
-      ::tagged_allocator(const tagged_allocator<U,Tag,OtherPointer> &)
-{}
-
-
-template<typename T, typename Tag, typename Pointer>
-  tagged_allocator<T,Tag,Pointer>
-    ::~tagged_allocator()
-{}
-
-
-template<typename T, typename Tag, typename Pointer>
-  typename tagged_allocator<T,Tag,Pointer>::pointer
-    tagged_allocator<T,Tag,Pointer>
-      ::address(reference x) const
-{
-  return &x;
-}
-
-
-template<typename T, typename Tag, typename Pointer>
-  typename tagged_allocator<T,Tag,Pointer>::const_pointer
-    tagged_allocator<T,Tag,Pointer>
-      ::address(const_reference x) const
-{
-  return &x;
-}
-
-
-template<typename T, typename Tag, typename Pointer>
-  typename tagged_allocator<T,Tag,Pointer>::size_type
-    tagged_allocator<T,Tag,Pointer>
-      ::max_size() const
-{
-  return (std::numeric_limits<size_type>::max)() / sizeof(T);
-}
-
-
-template<typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
-__host__ __device__
-bool operator==(const tagged_allocator<T1,Pointer1,Tag> &, const tagged_allocator<T2,Pointer2,Tag> &)
-{
-  return true;
-}
-
-
-template<typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
-__host__ __device__
-bool operator!=(const tagged_allocator<T1,Pointer1,Tag> &, const tagged_allocator<T2,Pointer2,Tag> &)
-{
-  return false;
-}
-    
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/allocator/temporary_allocator.h b/compat/thrust/detail/allocator/temporary_allocator.h
deleted file mode 100644
index f0496f9fe1..0000000000
--- a/compat/thrust/detail/allocator/temporary_allocator.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/tagged_allocator.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/pair.h>
-#include <thrust/memory.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-// XXX the pointer parameter given to tagged_allocator should be related to
-//     the type of the expression get_temporary_buffer(system, n).first
-//     without decltype, compromise on pointer<T,System>
-template<typename T, typename System>
-  class temporary_allocator
-    : public thrust::detail::tagged_allocator<
-               T, System, thrust::pointer<T,System>
-             >
-{
-  private:
-    typedef thrust::detail::tagged_allocator<
-      T, System, thrust::pointer<T,System>
-    > super_t;
-
-    System &m_system;
-
-  public:
-    typedef typename super_t::pointer   pointer;
-    typedef typename super_t::size_type size_type;
-
-    inline explicit temporary_allocator(thrust::execution_policy<System> &system) :
-      super_t(),
-      m_system(thrust::detail::derived_cast(system))
-    {}
-
-    pointer allocate(size_type cnt);
-
-    void deallocate(pointer p, size_type n);
-
-    inline System &system()
-    {
-      return m_system;
-    } // end system()
-
-  private:
-    typedef thrust::pair<pointer, size_type> pointer_and_size;
-}; // end temporary_allocator
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/allocator/temporary_allocator.inl>
-
diff --git a/compat/thrust/detail/allocator/temporary_allocator.inl b/compat/thrust/detail/allocator/temporary_allocator.inl
deleted file mode 100644
index 63221d57e9..0000000000
--- a/compat/thrust/detail/allocator/temporary_allocator.inl
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/allocator/temporary_allocator.h>
-#include <thrust/detail/temporary_buffer.h>
-#include <thrust/system/detail/bad_alloc.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename T, typename System>
-  typename temporary_allocator<T,System>::pointer
-    temporary_allocator<T,System>
-      ::allocate(typename temporary_allocator<T,System>::size_type cnt)
-{
-  pointer_and_size result = thrust::get_temporary_buffer<T>(system(), cnt);
-
-  // handle failure
-  if(result.second < cnt)
-  {
-    // deallocate and throw
-    // note that we pass cnt to deallocate, not a value derived from result.second
-    deallocate(result.first, cnt);
-
-    throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
-  } // end if
-
-  return result.first;
-} // end temporary_allocator::allocate()
-
-
-template<typename T, typename System>
-  void temporary_allocator<T,System>
-    ::deallocate(typename temporary_allocator<T,System>::pointer p, typename temporary_allocator<T,System>::size_type n)
-{
-  return thrust::return_temporary_buffer(system(), p);
-} // end temporary_allocator
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/binary_search.inl b/compat/thrust/detail/binary_search.inl
deleted file mode 100644
index 0fd799a311..0000000000
--- a/compat/thrust/detail/binary_search.inl
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.inl
- *  \brief Inline file for binary_search.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/binary_search.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/binary_search.h>
-#include <thrust/system/detail/adl/binary_search.h>
-
-namespace thrust
-{
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-ForwardIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const LessThanComparable &value)
-{
-    using thrust::system::detail::generic::lower_bound;
-    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-}
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T &value,
-                            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::lower_bound;
-    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
-}
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-ForwardIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const LessThanComparable &value)
-{
-    using thrust::system::detail::generic::upper_bound;
-    return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-}
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T &value,
-                            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::upper_bound;
-    return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-bool binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   ForwardIterator first, 
-                   ForwardIterator last,
-                   const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::binary_search;
-    return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   ForwardIterator first,
-                   ForwardIterator last,
-                   const T& value, 
-                   StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::binary_search;
-    return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T& value,
-            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::equal_range;
-    return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::equal_range;
-    return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output)
-{
-    using thrust::system::detail::generic::lower_bound;
-    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator lower_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::lower_bound;
-    return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output)
-{
-    using thrust::system::detail::generic::upper_bound;
-    return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator upper_bound(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::upper_bound;
-    return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator output)
-{
-    using thrust::system::detail::generic::binary_search;
-    return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator binary_search(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator output,
-                             StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::binary_search;
-    return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp);
-}
-
-
-//////////////////////
-// Scalar Functions //
-//////////////////////
-
-template <typename ForwardIterator, typename LessThanComparable>
-ForwardIterator lower_bound(ForwardIterator first, 
-                            ForwardIterator last,
-                            const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System; 
-
-    System system;
-
-    return thrust::lower_bound(select_system(system), first, last, value);
-}
-
-template <typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System; 
-
-    System system;
-
-    return thrust::lower_bound(select_system(system), first, last, value, comp);
-}
-
-template <typename ForwardIterator, typename LessThanComparable>
-ForwardIterator upper_bound(ForwardIterator first, 
-                            ForwardIterator last,
-                            const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::upper_bound(select_system(system), first, last, value);
-}
-
-template <typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::upper_bound(select_system(system), first, last, value, comp);
-}
-
-template <typename ForwardIterator, typename LessThanComparable>
-bool binary_search(ForwardIterator first, 
-                   ForwardIterator last,
-                   const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::binary_search(select_system(system), first, last, value);
-}
-
-template <typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(ForwardIterator first,
-                   ForwardIterator last,
-                   const T& value, 
-                   StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::binary_search(select_system(system), first, last, value, comp);
-}
-
-template <typename ForwardIterator, typename LessThanComparable>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable& value)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::equal_range(select_system(system), first, last, value);
-}
-
-template <typename ForwardIterator, typename T, typename StrictWeakOrdering>
-thrust::pair<ForwardIterator, ForwardIterator>
-equal_range(ForwardIterator first,
-            ForwardIterator last,
-            const T& value,
-            StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-    System system;
-
-    return thrust::equal_range(select_system(system), first, last, value, comp);
-}
-
-//////////////////////
-// Vector Functions //
-//////////////////////
-
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator lower_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output);
-}
-
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator lower_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp);
-}
-    
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator upper_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output);
-}
-
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator upper_bound(ForwardIterator first, 
-                           ForwardIterator last,
-                           InputIterator values_first, 
-                           InputIterator values_last,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp);
-}
-
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator binary_search(ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator output)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output);
-}
-
-template <typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator binary_search(ForwardIterator first, 
-                             ForwardIterator last,
-                             InputIterator values_first, 
-                             InputIterator values_last,
-                             OutputIterator output,
-                             StrictWeakOrdering comp)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-    typedef typename thrust::iterator_system<InputIterator>::type   System2;
-    typedef typename thrust::iterator_system<OutputIterator>::type  System3;
-
-    System1 system1;
-    System2 system2;
-    System3 system3;
-
-    return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp);
-}
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/config.h b/compat/thrust/detail/config.h
deleted file mode 100644
index d6b6691089..0000000000
--- a/compat/thrust/detail/config.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-/*! \file config.h
- *  \brief Defines platform configuration.
- */
-
-#pragma once
-
-#include <thrust/detail/config/config.h>
-
diff --git a/compat/thrust/detail/config/compiler.h b/compat/thrust/detail/config/compiler.h
deleted file mode 100644
index 90ce911155..0000000000
--- a/compat/thrust/detail/config/compiler.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file compiler.h
- *  \brief Compiler-specific configuration
- */
-
-#pragma once
-
-#ifdef __CUDACC__
-
-#include <cuda.h>
-
-// Thrust supports CUDA >= 3.0
-#if CUDA_VERSION < 3000
-#error "CUDA v3.0 or newer is required"
-#endif // CUDA_VERSION
-
-#endif // __CUDACC__
-
-// enumerate host compilers we know about
-#define THRUST_HOST_COMPILER_UNKNOWN 0
-#define THRUST_HOST_COMPILER_MSVC    1
-#define THRUST_HOST_COMPILER_GCC     2
-
-// enumerate host compilers we know about
-#define THRUST_DEVICE_COMPILER_UNKNOWN 0
-#define THRUST_DEVICE_COMPILER_MSVC    1
-#define THRUST_DEVICE_COMPILER_GCC     2
-#define THRUST_DEVICE_COMPILER_NVCC    3
-
-// figure out which host compiler we're using
-// XXX we should move the definition of THRUST_DEPRECATED out of this logic
-#if   defined(_MSC_VER)
-#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_MSVC
-#define THRUST_DEPRECATED __declspec(deprecated)
-#elif defined(__GNUC__)
-#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_GCC
-#define THRUST_DEPRECATED __attribute__ ((deprecated)) 
-#define THRUST_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#else
-#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_UNKNOWN
-#define THRUST_DEPRECATED
-#endif // THRUST_HOST_COMPILER
-
-// figure out which device compiler we're using
-#if defined(__CUDACC__)
-#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_NVCC
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_MSVC
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_GCC
-#else
-#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_UNKNOWN
-#endif
-
-// is the device compiler capable of compiling omp?
-#ifdef _OPENMP
-#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_TRUE
-#else
-#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_FALSE
-#endif // _OPENMP
-
-// disable specific MSVC warnings
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && !defined(__CUDA_ARCH__)
-#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x) \
-__pragma(warning(push)) \
-__pragma(warning(disable : x))
-#define __THRUST_DISABLE_MSVC_WARNING_END(x) \
-__pragma(warning(pop))
-#else
-#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x)
-#define __THRUST_DISABLE_MSVC_WARNING_END(x)
-#endif
-#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(x) \
-__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267) \
-x;\
-__THRUST_DISABLE_MSVC_WARNING_END(4244 4267)
-#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN \
-__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267)
-#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END \
-__THRUST_DISABLE_MSVC_WARNING_END(4244 4267)
-#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL(x) \
-__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800) \
-x;\
-__THRUST_DISABLE_MSVC_WARNING_END(4800)
-#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN \
-__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800)
-#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END \
-__THRUST_DISABLE_MSVC_WARNING_END(4800)
diff --git a/compat/thrust/detail/config/compiler_fence.h b/compat/thrust/detail/config/compiler_fence.h
deleted file mode 100644
index f5cbf98204..0000000000
--- a/compat/thrust/detail/config/compiler_fence.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// msvc case
-#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-
-#ifndef _DEBUG
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-#define __thrust_compiler_fence() _ReadWriteBarrier()
-#else
-
-#define __thrust_compiler_fence() do {} while (0)
-
-#endif // _DEBUG
-
-// gcc case
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-
-#if THRUST_GCC_VERSION >= 40200 // atomic built-ins were introduced ~4.2
-#define __thrust_compiler_fence() __sync_synchronize()
-#else
-// allow the code to compile without any guarantees
-#define __thrust_compiler_fence() do {} while (0)
-#endif // THRUST_GCC_VERSION
-
-// unknown case
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_UNKNOWN
-
-// allow the code to compile without any guarantees
-#define __thrust_compiler_fence() do {} while (0)
-
-#endif
-
diff --git a/compat/thrust/detail/config/config.h b/compat/thrust/detail/config/config.h
deleted file mode 100644
index f3498acd14..0000000000
--- a/compat/thrust/detail/config/config.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file config.h
- *  \brief Defines platform configuration.
- */
-
-#pragma once
-
-// XXX the order of these #includes matters
-
-#include <thrust/detail/config/simple_defines.h>
-#include <thrust/detail/config/compiler.h>
-// host_system.h & device_system.h must be #included as early as possible
-// because other config headers depend on it
-#include <thrust/detail/config/host_system.h>
-#include <thrust/detail/config/device_system.h>
-#include <thrust/detail/config/host_device.h>
-#include <thrust/detail/config/debug.h>
-#include <thrust/detail/config/compiler_fence.h>
-#include <thrust/detail/config/forceinline.h>
-#include <thrust/detail/config/hd_warning_disable.h>
-
diff --git a/compat/thrust/detail/config/debug.h b/compat/thrust/detail/config/debug.h
deleted file mode 100644
index 56c1bad207..0000000000
--- a/compat/thrust/detail/config/debug.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#ifndef THRUST_DEBUG
-#  ifndef NDEBUG
-#    if (DEBUG || _DEBUG)
-#      define THRUST_DEBUG 1
-#    endif // (DEBUG || _DEBUG)
-#  endif // NDEBUG
-#endif // THRUST_DEBUG
-
-#if THRUST_DEBUG
-#  ifndef __THRUST_SYNCHRONOUS
-#    define __THRUST_SYNCHRONOUS 1
-#  endif // __THRUST_SYNCHRONOUS
-#endif // THRUST_DEBUG
-
diff --git a/compat/thrust/detail/config/device_system.h b/compat/thrust/detail/config/device_system.h
deleted file mode 100644
index a104906753..0000000000
--- a/compat/thrust/detail/config/device_system.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-// reserve 0 for undefined
-#define THRUST_DEVICE_SYSTEM_CUDA    1
-#define THRUST_DEVICE_SYSTEM_OMP     2
-#define THRUST_DEVICE_SYSTEM_TBB     3
-#define THRUST_DEVICE_SYSTEM_CPP     4
-
-#ifndef THRUST_DEVICE_SYSTEM
-#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CUDA
-#endif // THRUST_DEVICE_SYSTEM
-
-// XXX make the use of THRUST_DEVICE_BACKEND an error in Thrust 1.7
-// XXX eliminate the following in Thrust 1.7
-
-#define THRUST_DEVICE_BACKEND_CUDA THRUST_DEVICE_SYSTEM_CUDA
-#define THRUST_DEVICE_BACKEND_OMP  THRUST_DEVICE_SYSTEM_OMP
-#define THRUST_DEVICE_BACKEND_TBB  THRUST_DEVICE_SYSTEM_TBB
-
-#ifdef THRUST_DEVICE_BACKEND
-#  if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-#    pragma message("----------------------------------------------------------------------------------")
-#    pragma message("| WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead |")
-#    pragma message("----------------------------------------------------------------------------------")
-#  else
-#    warning ----------------------------------------------------------------------------------
-#    warning | WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead |
-#    warning ----------------------------------------------------------------------------------
-#  endif // THRUST_HOST_COMPILER
-#  undef THRUST_DEVICE_SYSTEM
-#  define THRUST_DEVICE_SYSTEM THRUST_DEVICE_BACKEND
-#endif // THRUST_DEVICE_BACKEND
-
-#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
-#define __THRUST_DEVICE_SYSTEM_NAMESPACE cuda
-#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_OMP
-#define __THRUST_DEVICE_SYSTEM_NAMESPACE omp
-#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB
-#define __THRUST_DEVICE_SYSTEM_NAMESPACE tbb
-#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP
-#define __THRUST_DEVICE_SYSTEM_NAMESPACE cpp
-#endif
-
-#define __THRUST_DEVICE_SYSTEM_ROOT thrust/system/__THRUST_DEVICE_SYSTEM_NAMESPACE
-
diff --git a/compat/thrust/detail/config/forceinline.h b/compat/thrust/detail/config/forceinline.h
deleted file mode 100644
index 620769b999..0000000000
--- a/compat/thrust/detail/config/forceinline.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file forceinline.h
- *  \brief Defines __thrust_forceinline__
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#if defined(__CUDACC__)
-
-#define __thrust_forceinline__ __forceinline__
-
-#else
-
-// TODO add 
-
-#define __thrust_forceinline__
-
-#endif
-
diff --git a/compat/thrust/detail/config/hd_warning_disable.h b/compat/thrust/detail/config/hd_warning_disable.h
deleted file mode 100644
index b993ef2828..0000000000
--- a/compat/thrust/detail/config/hd_warning_disable.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file hd_warning_disable.h
- *  \brief Defines __thrust_hd_warning_disable__
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#if defined(__CUDACC__)
-
-#define __thrust_hd_warning_disable__ \
-#pragma hd_warning_disable
-#else
-
-#define __thrust_hd_warning_disable__
-
-#endif
-
-
diff --git a/compat/thrust/detail/config/host_device.h b/compat/thrust/detail/config/host_device.h
deleted file mode 100644
index 5d0975d106..0000000000
--- a/compat/thrust/detail/config/host_device.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file host_device.h
- *  \brief Defines __host__ and __device__ and other CUDA-isms
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
-
-#include <host_defines.h>
-
-#else
-
-// since __host__ & __device__ might have already be defined, only
-// #define them if not defined already
-// XXX this will break if the client does #include <host_defines.h> later
-
-#ifndef __host__
-#define __host__
-#endif // __host__
-
-#ifndef __device__
-#define __device__
-#endif // __device__
-
-#endif
-
diff --git a/compat/thrust/detail/config/host_system.h b/compat/thrust/detail/config/host_system.h
deleted file mode 100644
index fb8edabc55..0000000000
--- a/compat/thrust/detail/config/host_system.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-// reserve 0 for undefined
-#define THRUST_HOST_SYSTEM_CPP    1
-#define THRUST_HOST_SYSTEM_OMP    2
-#define THRUST_HOST_SYSTEM_TBB    3
-
-#ifndef THRUST_HOST_SYSTEM
-#define THRUST_HOST_SYSTEM THRUST_HOST_SYSTEM_CPP
-#endif // THRUST_HOST_SYSTEM
-
-// XXX make the use of THRUST_HOST_BACKEND an error in Thrust 1.7
-// XXX eliminate the following in Thrust 1.7
-
-#define THRUST_HOST_BACKEND_CPP THRUST_HOST_SYSTEM_CPP
-#define THRUST_HOST_BACKEND_OMP THRUST_HOST_SYSTEM_OMP
-#define THRUST_HOST_BACKEND_TBB THRUST_HOST_SYSTEM_TBB
-
-#ifdef THRUST_HOST_BACKEND
-#  if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-#    pragma message("------------------------------------------------------------------------------")
-#    pragma message("| WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead |")
-#    pragma message("------------------------------------------------------------------------------")
-#  else
-#    warning ------------------------------------------------------------------------------
-#    warning | WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead |
-#    warning ------------------------------------------------------------------------------
-#  endif // THRUST_HOST_COMPILER
-#  undef THRUST_HOST_SYSTEM
-#  define THRUST_HOST_SYSTEM THRUST_HOST_BACKEND
-#endif // THRUST_HOST_BACKEND
-
-#if THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_CPP
-#define __THRUST_HOST_SYSTEM_NAMESPACE cpp
-#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_OMP
-#define __THRUST_HOST_SYSTEM_NAMESPACE omp
-#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_TBB
-#define __THRUST_HOST_SYSTEM_NAMESPACE tbb
-#endif
-
-#define __THRUST_HOST_SYSTEM_ROOT thrust/system/__THRUST_HOST_SYSTEM_NAMESPACE
-
diff --git a/compat/thrust/detail/config/simple_defines.h b/compat/thrust/detail/config/simple_defines.h
deleted file mode 100644
index f9510ee9ce..0000000000
--- a/compat/thrust/detail/config/simple_defines.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file simple_defines.h
- *  \brief Primitive macros without dependencies.
- */
-
-#pragma once
-
-#define THRUST_UNKNOWN 0
-#define THRUST_FALSE   0
-#define THRUST_TRUE    1
-
-#define THRUST_PREVENT_MACRO_SUBSTITUTION
-
diff --git a/compat/thrust/detail/contiguous_storage.h b/compat/thrust/detail/contiguous_storage.h
deleted file mode 100644
index fe72bce2d2..0000000000
--- a/compat/thrust/detail/contiguous_storage.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/detail/normal_iterator.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// XXX parameter T is redundant with parameter Alloc
-template<typename T, typename Alloc>
-  class contiguous_storage
-{
-  private:
-    typedef thrust::detail::allocator_traits<Alloc> alloc_traits;
-
-  public:
-    typedef Alloc                                      allocator_type;
-    typedef T                                          value_type;
-    typedef typename alloc_traits::pointer             pointer;
-    typedef typename alloc_traits::const_pointer       const_pointer;
-    typedef typename alloc_traits::size_type           size_type;
-    typedef typename alloc_traits::difference_type     difference_type;
-
-    // XXX we should bring reference & const_reference into allocator_traits
-    //     at the moment, it's unclear how -- we have nothing analogous to
-    //     rebind_pointer for references
-    //     we either need to add reference_traits or extend the existing
-    //     pointer_traits to support wrapped references
-    typedef typename Alloc::reference                  reference;
-    typedef typename Alloc::const_reference            const_reference;
-
-    typedef thrust::detail::normal_iterator<pointer>       iterator;
-    typedef thrust::detail::normal_iterator<const_pointer> const_iterator;
-
-    explicit contiguous_storage(const allocator_type &alloc = allocator_type());
-
-    explicit contiguous_storage(size_type n, const allocator_type &alloc = allocator_type());
-
-    ~contiguous_storage(void);
-
-    size_type size(void) const;
-
-    size_type max_size(void) const;
-
-    iterator begin(void);
-    
-    const_iterator begin(void) const;
-
-    iterator end(void);
-
-    const_iterator end(void) const;
-
-    reference operator[](size_type n);
-
-    const_reference operator[](size_type n) const;
-
-    allocator_type get_allocator(void) const;
-
-    // note that allocate does *not* automatically call deallocate
-    void allocate(size_type n);
-
-    void deallocate(void);
-
-    void swap(contiguous_storage &x);
-
-    void default_construct_n(iterator first, size_type n);
-
-    void uninitialized_fill_n(iterator first, size_type n, const value_type &value);
-
-    template<typename InputIterator>
-    iterator uninitialized_copy(InputIterator first, InputIterator last, iterator result);
-
-    template<typename System, typename InputIterator>
-    iterator uninitialized_copy(thrust::execution_policy<System> &from_system,
-                                InputIterator first,
-                                InputIterator last,
-                                iterator result);
-
-    template<typename InputIterator, typename Size>
-    iterator uninitialized_copy_n(InputIterator first, Size n, iterator result);
-
-    template<typename System, typename InputIterator, typename Size>
-    iterator uninitialized_copy_n(thrust::execution_policy<System> &from_system,
-                                  InputIterator first,
-                                  Size n,
-                                  iterator result);
-
-    void destroy(iterator first, iterator last);
-
-  private:
-    // XXX we could inherit from this to take advantage of empty base class optimization
-    allocator_type m_allocator;
-
-    iterator m_begin;
-    
-    size_type m_size;
-
-    // disallow assignment
-    contiguous_storage &operator=(const contiguous_storage &x);
-}; // end contiguous_storage
-
-} // end detail
-
-template<typename T, typename Alloc> void swap(detail::contiguous_storage<T,Alloc> &lhs, detail::contiguous_storage<T,Alloc> &rhs);
-
-} // end thrust
-
-#include <thrust/detail/contiguous_storage.inl>
-
diff --git a/compat/thrust/detail/contiguous_storage.inl b/compat/thrust/detail/contiguous_storage.inl
deleted file mode 100644
index 7e26c26b1a..0000000000
--- a/compat/thrust/detail/contiguous_storage.inl
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/contiguous_storage.h>
-#include <thrust/detail/swap.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/allocator/copy_construct_range.h>
-#include <thrust/detail/allocator/default_construct_range.h>
-#include <thrust/detail/allocator/destroy_range.h>
-#include <thrust/detail/allocator/fill_construct_range.h>
-#include <utility> // for use of std::swap in the WAR below
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename T, typename Alloc>
-  contiguous_storage<T,Alloc>
-    ::contiguous_storage(const Alloc &alloc)
-      :m_allocator(alloc),
-       m_begin(pointer(static_cast<T*>(0))),
-       m_size(0)
-{
-  ;
-} // end contiguous_storage::contiguous_storage()
-
-template<typename T, typename Alloc>
-  contiguous_storage<T,Alloc>
-    ::contiguous_storage(size_type n, const Alloc &alloc)
-      :m_allocator(alloc),
-       m_begin(pointer(static_cast<T*>(0))),
-       m_size(0)
-{
-  allocate(n);
-} // end contiguous_storage::contiguous_storage()
-
-template<typename T, typename Alloc>
-  contiguous_storage<T,Alloc>
-    ::~contiguous_storage(void)
-{
-  deallocate();
-} // end contiguous_storage::~contiguous_storage()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::size_type
-    contiguous_storage<T,Alloc>
-      ::size(void) const
-{
-  return m_size;
-} // end contiguous_storage::size()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::size_type
-    contiguous_storage<T,Alloc>
-      ::max_size(void) const
-{
-  return alloc_traits::max_size(m_allocator);
-} // end contiguous_storage::max_size()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::iterator
-    contiguous_storage<T,Alloc>
-      ::begin(void)
-{
-  return m_begin;
-} // end contiguous_storage::begin()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::const_iterator
-    contiguous_storage<T,Alloc>
-      ::begin(void) const
-{
-  return m_begin;
-} // end contiguous_storage::begin()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::iterator
-    contiguous_storage<T,Alloc>
-      ::end(void)
-{
-  return m_begin + size();
-} // end contiguous_storage::end()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::const_iterator
-    contiguous_storage<T,Alloc>
-      ::end(void) const
-{
-  return m_begin + size();
-} // end contiguous_storage::end()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::reference
-    contiguous_storage<T,Alloc>
-      ::operator[](size_type n)
-{
-  return m_begin[n];
-} // end contiguous_storage::operator[]()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::const_reference
-    contiguous_storage<T,Alloc>
-      ::operator[](size_type n) const
-{
-  return m_begin[n];
-} // end contiguous_storage::operator[]()
-
-template<typename T, typename Alloc>
-  typename contiguous_storage<T,Alloc>::allocator_type
-    contiguous_storage<T,Alloc>
-      ::get_allocator(void) const
-{
-  return m_allocator;
-} // end contiguous_storage::get_allocator()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::allocate(size_type n)
-{
-  if(n > 0)
-  {
-    m_begin = iterator(m_allocator.allocate(n));
-    m_size = n;
-  } // end if
-  else
-  {
-    m_begin = iterator(pointer(static_cast<T*>(0)));
-    m_size = 0;
-  } // end else
-} // end contiguous_storage::allocate()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::deallocate(void)
-{
-  if(size() > 0)
-  {
-    m_allocator.deallocate(m_begin.base(), size());
-    m_begin = iterator(pointer(static_cast<T*>(0)));
-    m_size = 0;
-  } // end if
-} // end contiguous_storage::deallocate()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::swap(contiguous_storage &x)
-{
-  thrust::swap(m_begin, x.m_begin);
-  thrust::swap(m_size, x.m_size);
-
-  // XXX WAR nvcc 4.0's "calling a __host__ function from a __host__ __device__ function is not allowed" warning
-  //thrust::swap(m_allocator, x.m_allocator);
-  std::swap(m_allocator, x.m_allocator);
-} // end contiguous_storage::swap()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::default_construct_n(iterator first, size_type n)
-{
-  default_construct_range(m_allocator, first.base(), n);
-} // end contiguous_storage::default_construct_n()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::uninitialized_fill_n(iterator first, size_type n, const value_type &x)
-{
-  fill_construct_range(m_allocator, first.base(), n, x);
-} // end contiguous_storage::uninitialized_fill()
-
-template<typename T, typename Alloc>
-  template<typename System, typename InputIterator>
-    typename contiguous_storage<T,Alloc>::iterator
-      contiguous_storage<T,Alloc>
-        ::uninitialized_copy(thrust::execution_policy<System> &from_system, InputIterator first, InputIterator last, iterator result)
-{
-  return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base()));
-} // end contiguous_storage::uninitialized_copy()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    typename contiguous_storage<T,Alloc>::iterator
-      contiguous_storage<T,Alloc>
-        ::uninitialized_copy(InputIterator first, InputIterator last, iterator result)
-{
-  // XXX assumes InputIterator's associated System is default-constructible
-  typename thrust::iterator_system<InputIterator>::type from_system;
-
-  return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base()));
-} // end contiguous_storage::uninitialized_copy()
-
-template<typename T, typename Alloc>
-  template<typename System, typename InputIterator, typename Size>
-    typename contiguous_storage<T,Alloc>::iterator
-      contiguous_storage<T,Alloc>
-        ::uninitialized_copy_n(thrust::execution_policy<System> &from_system, InputIterator first, Size n, iterator result)
-{
-  return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base()));
-} // end contiguous_storage::uninitialized_copy_n()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator, typename Size>
-    typename contiguous_storage<T,Alloc>::iterator
-      contiguous_storage<T,Alloc>
-        ::uninitialized_copy_n(InputIterator first, Size n, iterator result)
-{
-  // XXX assumes InputIterator's associated System is default-constructible
-  typename thrust::iterator_system<InputIterator>::type from_system;
-
-  return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base()));
-} // end contiguous_storage::uninitialized_copy_n()
-
-template<typename T, typename Alloc>
-  void contiguous_storage<T,Alloc>
-    ::destroy(iterator first, iterator last)
-{
-  destroy_range(m_allocator, first.base(), last - first);
-} // end contiguous_storage::destroy()
-
-} // end detail
-
-template<typename T, typename Alloc>
-  void swap(detail::contiguous_storage<T,Alloc> &lhs, detail::contiguous_storage<T,Alloc> &rhs)
-{
-  lhs.swap(rhs);
-} // end swap()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/copy.h b/compat/thrust/detail/copy.h
deleted file mode 100644
index 8ed3abd219..0000000000
--- a/compat/thrust/detail/copy.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-template<typename System,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(const thrust::detail::execution_policy_base<System> &system,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-template<typename System,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(const thrust::detail::execution_policy_base<System> &system,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-
-namespace detail
-{
-
-
-template<typename FromSystem,
-         typename ToSystem,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator two_system_copy(thrust::execution_policy<FromSystem> &from_system,
-                                 thrust::execution_policy<ToSystem>   &two_system,
-                                 InputIterator first,
-                                 InputIterator last,
-                                 OutputIterator result);
-
-
-template<typename FromSystem,
-         typename ToSystem,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator two_system_copy_n(thrust::execution_policy<FromSystem> &from_system,
-                                   thrust::execution_policy<ToSystem>   &two_system,
-                                   InputIterator first,
-                                   Size n,
-                                   OutputIterator result);
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/copy.inl>
-
diff --git a/compat/thrust/detail/copy.inl b/compat/thrust/detail/copy.inl
deleted file mode 100644
index 9ac48074a0..0000000000
--- a/compat/thrust/detail/copy.inl
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/copy.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/copy.h>
-#include <thrust/system/detail/adl/copy.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator>
-  OutputIterator copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  using thrust::system::detail::generic::copy;
-  return copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end copy()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Size, typename OutputIterator>
-  OutputIterator copy_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  using thrust::system::detail::generic::copy_n;
-  return copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result);
-} // end copy_n()
-
-
-namespace detail
-{
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator two_system_copy(thrust::execution_policy<System1> &system1,
-                                 thrust::execution_policy<System2> &system2,
-                                 InputIterator first,
-                                 InputIterator last,
-                                 OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  return thrust::copy(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, last, result);
-} // end two_system_copy()
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator two_system_copy_n(thrust::execution_policy<System1> &system1,
-                                   thrust::execution_policy<System2> &system2,
-                                   InputIterator first,
-                                   Size n,
-                                   OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  return thrust::copy_n(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, n, result);
-} // end two_system_copy_n()
-
-
-} // end detail
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::detail::two_system_copy(system1, system2, first, last, result);
-} // end copy()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::detail::two_system_copy_n(system1, system2, first, n, result);
-} // end copy_n()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/copy_if.h b/compat/thrust/detail/copy_if.h
deleted file mode 100644
index 54e1ef4027..0000000000
--- a/compat/thrust/detail/copy_if.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred);
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-} // end thrust
-
-#include <thrust/detail/copy_if.inl>
-
diff --git a/compat/thrust/detail/copy_if.inl b/compat/thrust/detail/copy_if.inl
deleted file mode 100644
index e443bb7d0b..0000000000
--- a/compat/thrust/detail/copy_if.inl
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/copy_if.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/copy_if.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/adl/copy_if.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  using thrust::system::detail::generic::copy_if;
-  return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred);
-} // end copy_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  using thrust::system::detail::generic::copy_if;
-  return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred);
-} // end copy_if()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::copy_if(select_system(system1,system2), first, last, result, pred);
-} // end copy_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred);
-} // end copy_if()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/count.inl b/compat/thrust/detail/count.inl
deleted file mode 100644
index d2856ae1ce..0000000000
--- a/compat/thrust/detail/count.inl
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file count.inl
- *  \brief Inline file for count.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/count.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/count.h>
-#include <thrust/system/detail/adl/count.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename EqualityComparable>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, const EqualityComparable& value)
-{
-  using thrust::system::detail::generic::count;
-  return count(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-} // end count()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-  typename thrust::iterator_traits<InputIterator>::difference_type
-    count_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::count_if;
-  return count_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end count_if()
-
-
-template <typename InputIterator, typename EqualityComparable>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count(InputIterator first, InputIterator last, const EqualityComparable& value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::count(select_system(system), first, last, value);
-} // end count()
-
-
-template <typename InputIterator, typename Predicate>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count_if(InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::count_if(select_system(system), first, last, pred);
-} // end count_if()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/cstdint.h b/compat/thrust/detail/cstdint.h
deleted file mode 100644
index 25d30fd5b2..0000000000
--- a/compat/thrust/detail/cstdint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC)
-#include <stdint.h>
-#endif
-
-namespace thrust
-{
-namespace detail
-{
-
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC)
-
-#if (_MSC_VER < 1300)
-   typedef signed   char     int8_t;
-   typedef signed   short    int16_t;
-   typedef signed   int      int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed   __int8   int8_t;
-   typedef signed   __int16  int16_t;
-   typedef signed   __int32  int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed   __int64     int64_t;
-typedef unsigned __int64     uint64_t;
-
-#else
-
-typedef ::int8_t   int8_t;
-typedef ::int16_t  int16_t;
-typedef ::int32_t  int32_t;
-typedef ::int64_t  int64_t;
-typedef ::uint8_t  uint8_t;
-typedef ::uint16_t uint16_t;
-typedef ::uint32_t uint32_t;
-typedef ::uint64_t uint64_t;
-
-#endif
-
-
-// an oracle to tell us how to define intptr_t
-template<int word_size = sizeof(void*)> struct divine_intptr_t;
-template<int word_size = sizeof(void*)> struct divine_uintptr_t;
-
-// 32b platforms
-template<>  struct divine_intptr_t<4>  {  typedef thrust::detail::int32_t  type; };
-template<>  struct divine_uintptr_t<4> {  typedef thrust::detail::uint32_t type; };
-
-// 64b platforms
-template<>  struct divine_intptr_t<8>  { typedef thrust::detail::int64_t  type; };
-template<>  struct divine_uintptr_t<8> { typedef thrust::detail::uint64_t type; };
-
-typedef divine_intptr_t<>::type   intptr_t;
-typedef divine_uintptr_t<>::type  uintptr_t;
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/device_delete.inl b/compat/thrust/detail/device_delete.inl
deleted file mode 100644
index dd70d76891..0000000000
--- a/compat/thrust/detail/device_delete.inl
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_delete.inl
- *  \brief Inline file for device_delete.h.
- */
-
-#include <thrust/device_delete.h>
-#include <thrust/device_free.h>
-#include <thrust/detail/allocator/destroy_range.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-// define an empty allocator class to use below
-struct device_delete_allocator {};
-
-}
-
-template<typename T>
-  void device_delete(device_ptr<T> ptr,
-                     const size_t n)
-{
-  // we can use device_allocator to destroy the range
-  thrust::detail::device_delete_allocator a;
-  thrust::detail::destroy_range(a, ptr, n);
-  thrust::device_free(ptr);
-} // end device_delete()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/device_free.inl b/compat/thrust/detail/device_free.inl
deleted file mode 100644
index ab8db9f09c..0000000000
--- a/compat/thrust/detail/device_free.inl
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_free.inl
- *  \brief Inline file for device_free.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/device_free.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/detail/malloc_and_free.h>
-
-namespace thrust
-{
-
-void device_free(thrust::device_ptr<void> ptr)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef thrust::iterator_system< thrust::device_ptr<void> >::type system;
-
-  // XXX lower to select_system(system) here
-  system s;
-
-  thrust::free(s, ptr);
-} // end device_free()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/device_malloc.inl b/compat/thrust/detail/device_malloc.inl
deleted file mode 100644
index 76d0029993..0000000000
--- a/compat/thrust/detail/device_malloc.inl
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_malloc.inl
- *  \brief Inline file for device_malloc.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/device_malloc.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/detail/malloc_and_free.h>
-
-namespace thrust
-{
-
-
-thrust::device_ptr<void> device_malloc(const std::size_t n)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef thrust::iterator_system< thrust::device_ptr<void> >::type system;
-
-  // XXX lower to select_system(system) here
-  system s;
-
-  return thrust::device_ptr<void>(thrust::malloc(s, n).get());
-} // end device_malloc()
-
-
-template<typename T>
-  thrust::device_ptr<T> device_malloc(const std::size_t n)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef thrust::iterator_system< thrust::device_ptr<void> >::type system;
-
-  // XXX lower to select_system(system) here
-  system s;
-
-  return thrust::device_ptr<T>(thrust::malloc<T>(s,n).get());
-} // end device_malloc()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/device_new.inl b/compat/thrust/detail/device_new.inl
deleted file mode 100644
index 1f00a97a94..0000000000
--- a/compat/thrust/detail/device_new.inl
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_new.inl
- *  \brief Inline file for device_new.h.
- */
-
-#include <thrust/device_new.h>
-#include <thrust/device_malloc.h>
-#include <thrust/uninitialized_fill.h>
-
-namespace thrust
-{
-
-template<typename T>
-  device_ptr<T> device_new(device_ptr<void> p,
-                           const size_t n)
-{
-  // XXX TODO dispatch n null device constructors at p here
-  // in the meantime, dispatch 1 null host constructor here
-  // and dispatch n copy constructors
-  return device_new<T>(p, T(), n);
-} // end device_new()
-
-template<typename T>
-  device_ptr<T> device_new(device_ptr<void> p,
-                           const T &exemplar,
-                           const size_t n)
-{
-  device_ptr<T> result(reinterpret_cast<T*>(p.get()));
-
-  // run copy constructors at p here
-  thrust::uninitialized_fill(result, result + n, exemplar);
-  
-  return result;
-} // end device_new()
-
-template<typename T>
-  device_ptr<T> device_new(const size_t n)
-{
-  // call placement new
-  return device_new<T>(thrust::device_malloc<T>(n));
-} // end device_new()
-
-} // thrust
-
diff --git a/compat/thrust/detail/device_ptr.inl b/compat/thrust/detail/device_ptr.inl
deleted file mode 100644
index 0afe8a19c1..0000000000
--- a/compat/thrust/detail/device_ptr.inl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_ptr.inl
- *  \brief Inline file for device_ptr.h.
- */
-
-#include <thrust/device_ptr.h>
-#include <thrust/device_reference.h>
-#include <iostream>
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-template<typename T>
-  device_ptr<T> device_pointer_cast(T *ptr)
-{
-  return device_ptr<T>(ptr);
-} // end device_pointer_cast()
-
-template<typename T>
-  device_ptr<T> device_pointer_cast(const device_ptr<T> &ptr)
-{
-  return ptr;
-} // end device_pointer_cast()
-
-// output to ostream
-template<class E, class T, class Y>
-  std::basic_ostream<E, T> &operator<<(std::basic_ostream<E, T> &os, const device_ptr<Y> &p)
-{
-  return os << p.get();
-} // end operator<<()
-
-
-namespace detail
-{
-
-template<typename T>
-  struct is_device_ptr< thrust::device_ptr<T> >
-    : public true_type
-{
-}; // end is_device_ptr
-
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-// XXX WAR MSVC 2005 problem with correctly implementing
-//     pointer_raw_pointer for device_ptr by specializing it here
-template<typename T>
-  struct pointer_raw_pointer< thrust::device_ptr<T> >
-{
-  typedef typename device_ptr<T>::raw_pointer type;
-}; // end pointer_raw_pointer
-#endif
-
-
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/device_reference.inl b/compat/thrust/detail/device_reference.inl
deleted file mode 100644
index ad5cb76688..0000000000
--- a/compat/thrust/detail/device_reference.inl
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_reference.inl
- *  \brief Inline file for device_reference.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/device_reference.h>
-
-namespace thrust
-{
-
-template<typename T>
-  template<typename OtherT>
-    device_reference<T> &
-      device_reference<T>
-        ::operator=(const device_reference<OtherT> &other)
-{
-  return super_t::operator=(other);
-} // end operator=()
-
-template<typename T>
-  device_reference<T> &
-    device_reference<T>
-      ::operator=(const value_type &x)
-{
-  return super_t::operator=(x);
-} // end operator=()
-
-template<typename T>
-__host__ __device__
-void swap(device_reference<T> &a, device_reference<T> &b)
-{
-  a.swap(b);
-} // end swap()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/device_vector.inl b/compat/thrust/detail/device_vector.inl
deleted file mode 100644
index f6bafbaa5f..0000000000
--- a/compat/thrust/detail/device_vector.inl
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_vector.inl
- *  \brief Inline file for device_vector.h.
- */
-
-#include <thrust/host_vector.h>
-
-namespace thrust
-{
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    device_vector<T,Alloc>
-      ::device_vector(const host_vector<OtherT,OtherAlloc> &v)
-        :Parent(v)
-{
-  ;
-} // end device_vector::device_vector()
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/dispatch/is_trivial_copy.h b/compat/thrust/detail/dispatch/is_trivial_copy.h
deleted file mode 100644
index 2bedf1f716..0000000000
--- a/compat/thrust/detail/dispatch/is_trivial_copy.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file trivial_copy.h
- *  \brief Device implementations for copying memory between host and device.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/is_trivial_iterator.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-namespace dispatch
-{
-
-
-// a trivial copy's iterator's value_types match,
-// the iterators themselves are normal_iterators
-// and the ToIterator's value_type has_trivial_assign
-template<typename FromIterator, typename ToIterator>
-  struct is_trivial_copy :
-    integral_constant<
-      bool,
-      is_same<
-        typename thrust::iterator_value<FromIterator>::type,
-        typename thrust::iterator_value<ToIterator>::type
-      >::value
-      && is_trivial_iterator<FromIterator>::value
-      && is_trivial_iterator<ToIterator>::value
-      && has_trivial_assign<typename thrust::iterator_value<ToIterator>::type>::value
-    > {};
-
-} // end namespace dispatch
-
-} // end namespace detail
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/distance.inl b/compat/thrust/detail/distance.inl
deleted file mode 100644
index f37595f324..0000000000
--- a/compat/thrust/detail/distance.inl
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file distance.inl
- *  \brief Inline file for distance.h
- */
-
-#include <thrust/advance.h>
-#include <thrust/system/detail/generic/distance.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last)
-{
-  return thrust::system::detail::generic::distance(first, last);
-} // end distance()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/equal.inl b/compat/thrust/detail/equal.inl
deleted file mode 100644
index ca6fecccf1..0000000000
--- a/compat/thrust/detail/equal.inl
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file equal.inl
- *  \brief Inline file for equal.h.
- */
-
-#include <thrust/equal.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/equal.h>
-#include <thrust/system/detail/adl/equal.h>
-
-namespace thrust
-{
-
-
-template<typename System, typename InputIterator1, typename InputIterator2>
-bool equal(const thrust::detail::execution_policy_base<System> &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2)
-{
-  using thrust::system::detail::generic::equal;
-  return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2);
-} // end equal()
-
-
-template<typename System, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-bool equal(const thrust::detail::execution_policy_base<System> &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::equal;
-  return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2, binary_pred);
-} // end equal()
-
-
-template <typename InputIterator1, typename InputIterator2>
-bool equal(InputIterator1 first1, InputIterator1 last1,
-           InputIterator2 first2)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::equal(select_system(system1,system2), first1, last1, first2);
-}
-
-
-template <typename InputIterator1, typename InputIterator2, 
-          typename BinaryPredicate>
-bool equal(InputIterator1 first1, InputIterator1 last1,
-           InputIterator2 first2, BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::equal(select_system(system1,system2), first1, last1, first2, binary_pred);
-}
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/execute_with_allocator.h b/compat/thrust/detail/execute_with_allocator.h
deleted file mode 100644
index 9d3c1ba29b..0000000000
--- a/compat/thrust/detail/execute_with_allocator.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/util/blocking.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename ToPointer, typename FromPointer>
-__host__ __device__
-ToPointer reinterpret_pointer_cast(FromPointer ptr)
-{
-  typedef typename thrust::detail::pointer_element<ToPointer>::type to_element;
-  return ToPointer(reinterpret_cast<to_element*>(thrust::raw_pointer_cast(ptr)));
-}
-
-
-template<typename Allocator, template <typename> class BaseSystem>
-  struct execute_with_allocator
-    : BaseSystem<execute_with_allocator<Allocator, BaseSystem> >
-{
-  Allocator &m_alloc;
-
-  execute_with_allocator(Allocator &alloc)
-    : m_alloc(alloc)
-  {}
-
-  template<typename T>
-    friend thrust::pair<T*,std::ptrdiff_t>
-      get_temporary_buffer(execute_with_allocator &system, std::ptrdiff_t n)
-  {
-    typedef typename thrust::detail::allocator_traits<Allocator> alloc_traits;
-    typedef typename alloc_traits::void_pointer                  void_pointer;
-    typedef typename alloc_traits::size_type                     size_type;
-    typedef typename alloc_traits::value_type                    value_type;
-
-    // how many elements of type value_type do we need to accomodate n elements of type T?
-    size_type num_elements = thrust::detail::util::divide_ri(sizeof(T) * n, sizeof(value_type));
-
-    // allocate that many
-    void_pointer ptr = alloc_traits::allocate(system.m_alloc, num_elements);
-
-    // return the pointer and the number of elements of type T allocated
-    return thrust::make_pair(thrust::detail::reinterpret_pointer_cast<T*>(ptr),n);
-  }
-
-  template<typename Pointer>
-    friend void return_temporary_buffer(execute_with_allocator &system, Pointer p)
-  {
-    typedef typename thrust::detail::allocator_traits<Allocator> alloc_traits;
-    typedef typename alloc_traits::pointer                       pointer;
-
-    // return the pointer to the allocator
-    pointer to_ptr = thrust::detail::reinterpret_pointer_cast<pointer>(p);
-    alloc_traits::deallocate(system.m_alloc, to_ptr, 0);
-  }
-};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/execution_policy.h b/compat/thrust/detail/execution_policy.h
deleted file mode 100644
index 28e77f2e91..0000000000
--- a/compat/thrust/detail/execution_policy.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-// execution_policy_base serves as a guard against
-// inifinite recursion in thrust entry points:
-//
-// template<typename DerivedPolicy>
-// void foo(const thrust::detail::execution_policy_base<DerivedPolicy> &s)
-// {
-//   using thrust::system::detail::generic::foo;
-//
-//   foo(thrust::detail::derived_cast(thrust::detail::strip_const(s));
-// }
-//
-// foo is not recursive when
-// 1. DerivedPolicy is derived from thrust::execution_policy below
-// 2. generic::foo takes thrust::execution_policy as a parameter
-template<typename DerivedPolicy> struct execution_policy_base {};
-
-
-template<typename DerivedPolicy>
-__host__ __device__
-inline execution_policy_base<DerivedPolicy> &strip_const(const execution_policy_base<DerivedPolicy> &x)
-{
-  return const_cast<execution_policy_base<DerivedPolicy>&>(x);
-}
-
-
-template<typename DerivedPolicy>
-__host__ __device__
-inline DerivedPolicy &derived_cast(execution_policy_base<DerivedPolicy> &x)
-{
-  return static_cast<DerivedPolicy&>(x);
-}
-
-
-template<typename DerivedPolicy>
-__host__ __device__
-inline const DerivedPolicy &derived_cast(const execution_policy_base<DerivedPolicy> &x)
-{
-  return static_cast<const DerivedPolicy&>(x);
-}
-
-
-} // end detail
-
-
-template<typename DerivedPolicy>
-  struct execution_policy
-    : thrust::detail::execution_policy_base<DerivedPolicy>
-{};
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/extrema.inl b/compat/thrust/detail/extrema.inl
deleted file mode 100644
index 4bcd0bde56..0000000000
--- a/compat/thrust/detail/extrema.inl
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/extrema.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/extrema.h>
-#include <thrust/system/detail/adl/extrema.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator min_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::min_element;
-  return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end min_element()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::min_element;
-  return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end min_element()
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator max_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::max_element;
-  return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end max_element()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::max_element;
-  return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end max_element()
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::minmax_element;
-  return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end minmax_element()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::minmax_element;
-  return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end minmax_element()
-
-
-template <typename ForwardIterator>
-ForwardIterator min_element(ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::min_element(select_system(system), first, last);
-} // end min_element()
-
-
-template <typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(ForwardIterator first, ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::min_element(select_system(system), first, last, comp);
-} // end min_element()
-
-
-template <typename ForwardIterator>
-ForwardIterator max_element(ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::max_element(select_system(system), first, last);
-} // end max_element()
-
-
-template <typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(ForwardIterator first, ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::max_element(select_system(system), first, last, comp);
-} // end max_element()
-
-
-template <typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> 
-minmax_element(ForwardIterator first, ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::minmax_element(select_system(system), first, last);
-} // end minmax_element()
-
-
-template <typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> 
-minmax_element(ForwardIterator first, ForwardIterator last, BinaryPredicate comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::minmax_element(select_system(system), first, last, comp);
-} // end minmax_element()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/fill.inl b/compat/thrust/detail/fill.inl
deleted file mode 100644
index c60e4a059e..0000000000
--- a/compat/thrust/detail/fill.inl
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fill.inl
- *  \brief Inline file for fill.h.
- */
-
-#include <thrust/fill.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/fill.h>
-#include <thrust/system/detail/adl/fill.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void fill(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value)
-{
-  using thrust::system::detail::generic::fill;
-  return fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-} // end fill()
-
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value)
-{
-  using thrust::system::detail::generic::fill_n;
-  return fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, value);
-} // end fill_n()
-
-
-template<typename ForwardIterator, typename T>
-  void fill(ForwardIterator first,
-            ForwardIterator last,
-            const T &value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  thrust::fill(select_system(system), first, last, value);
-} // end fill()
-
-
-template<typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(OutputIterator first,
-                        Size n,
-                        const T &value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<OutputIterator>::type System;
-
-  System system;
-
-  return thrust::fill_n(select_system(system), first, n, value);
-} // end fill()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/find.inl b/compat/thrust/detail/find.inl
deleted file mode 100644
index 465c937395..0000000000
--- a/compat/thrust/detail/find.inl
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file find.inl
- *  \brief Inline file for find.h
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/find.h>
-#include <thrust/system/detail/adl/find.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename T>
-InputIterator find(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   const T& value)
-{
-  using thrust::system::detail::generic::find;
-  return find(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-} // end find()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  using thrust::system::detail::generic::find_if;
-  return find_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end find_if()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if_not(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          Predicate pred)
-{
-  using thrust::system::detail::generic::find_if_not;
-  return find_if_not(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end find_if_not()
-
-
-template <typename InputIterator, typename T>
-InputIterator find(InputIterator first,
-                   InputIterator last,
-                   const T& value)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<InputIterator>::type System;
-
-    System system;
-
-    return thrust::find(select_system(system), first, last, value);
-}
-
-template <typename InputIterator, typename Predicate>
-InputIterator find_if(InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<InputIterator>::type System;
-
-    System system;
-
-    return thrust::find_if(select_system(system), first, last, pred);
-}
-
-template <typename InputIterator, typename Predicate>
-InputIterator find_if_not(InputIterator first,
-                          InputIterator last,
-                          Predicate pred)
-{
-    using thrust::system::detail::generic::select_system;
-
-    typedef typename thrust::iterator_system<InputIterator>::type System;
-
-    System system;
-
-    return thrust::find_if_not(select_system(system), first, last, pred);
-}
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/for_each.inl b/compat/thrust/detail/for_each.inl
deleted file mode 100644
index 7c9dc172e3..0000000000
--- a/compat/thrust/detail/for_each.inl
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.inl
- *  \brief Inline file for for_each.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/for_each.h>
-#include <thrust/system/detail/adl/for_each.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename UnaryFunction>
-  InputIterator for_each(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         UnaryFunction f)
-{
-  using thrust::system::detail::generic::for_each;
-
-  return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f);
-}
-
-
-template<typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f)
-{
-  using thrust::system::detail::generic::select_system;
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-  return thrust::for_each(select_system(system), first, last, f);
-} // end for_each()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Size, typename UnaryFunction>
-  InputIterator for_each_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first,
-                           Size n,
-                           UnaryFunction f)
-{
-  using thrust::system::detail::generic::for_each_n;
-
-  return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, f);
-} // end for_each_n()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(InputIterator first,
-                         Size n,
-                         UnaryFunction f)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-  return thrust::for_each_n(select_system(system), first, n, f);
-} // end for_each_n()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/function.h b/compat/thrust/detail/function.h
deleted file mode 100644
index 36b76c286f..0000000000
--- a/compat/thrust/detail/function.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_reference_cast.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename Function, typename Result>
-  struct host_function
-{
-  // mutable because Function::operator() might be const
-  mutable Function m_f;
-
-  inline host_function()
-    : m_f()
-  {}
-
-  inline host_function(const Function &f)
-    : m_f(f)
-  {}
-
-  template<typename Argument>
-    inline Result operator()(Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument>
-    inline Result operator()(const Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline Result operator()(Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline Result operator()(const Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline Result operator()(const Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline Result operator()(Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-}; // end host_function
-
-
-template<typename Function, typename Result>
-  struct device_function
-{
-  // mutable because Function::operator() might be const
-  mutable Function m_f;
-
-  inline __device__ device_function()
-    : m_f()
-  {}
-
-  inline __device__ device_function(const Function &f)
-    : m_f(f)
-  {}
-
-  template<typename Argument>
-    inline __device__ Result operator()(Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument>
-    inline __device__ Result operator()(const Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __device__ Result operator()(Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __device__ Result operator()(const Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __device__ Result operator()(const Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __device__ Result operator()(Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-}; // end device_function
-
-
-template<typename Function, typename Result>
-  struct host_device_function
-{
-  // mutable because Function::operator() might be const
-  mutable Function m_f;
-
-  inline __host__ __device__
-  host_device_function()
-    : m_f()
-  {}
-
-  inline __host__ __device__
-  host_device_function(const Function &f)
-    : m_f(f)
-  {}
-
-  __thrust_hd_warning_disable__
-  template<typename Argument>
-  inline __host__ __device__
-    Result operator()(Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument>
-    inline __host__ __device__ Result operator()(const Argument &x) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __host__ __device__ Result operator()(Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __host__ __device__ Result operator()(const Argument1 &x, Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __host__ __device__ Result operator()(const Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-
-  template<typename Argument1, typename Argument2>
-    inline __host__ __device__ Result operator()(Argument1 &x, const Argument2 &y) const
-  {
-    // we static cast to Result to handle void Result without error
-    // in case Function's result is non-void
-    return static_cast<Result>(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)));
-  }
-}; // end host_device_function
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional.inl b/compat/thrust/detail/functional.inl
deleted file mode 100644
index 4024585935..0000000000
--- a/compat/thrust/detail/functional.inl
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/functional.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename Operation>
-  struct unary_traits_imp;
-
-template<typename Operation>
-  struct unary_traits_imp<Operation*>
-{
-  typedef Operation                         function_type;
-  typedef const function_type &             param_type;
-  typedef typename Operation::result_type   result_type;
-  typedef typename Operation::argument_type argument_type;
-}; // end unary_traits_imp
-
-template<typename Result, typename Argument>
-  struct unary_traits_imp<Result(*)(Argument)>
-{
-  typedef Result   (*function_type)(Argument);
-  typedef Result   (*param_type)(Argument);
-  typedef Result   result_type;
-  typedef Argument argument_type;
-}; // end unary_traits_imp
-
-template<typename Operation>
-  struct binary_traits_imp;
-
-template<typename Operation>
-  struct binary_traits_imp<Operation*>
-{
-  typedef Operation                                function_type;
-  typedef const function_type &                    param_type;
-  typedef typename Operation::result_type          result_type;
-  typedef typename Operation::first_argument_type  first_argument_type;
-  typedef typename Operation::second_argument_type second_argument_type;
-}; // end binary_traits_imp
-
-template<typename Result, typename Argument1, typename Argument2>
-  struct binary_traits_imp<Result(*)(Argument1, Argument2)>
-{
-  typedef Result (*function_type)(Argument1, Argument2);
-  typedef Result (*param_type)(Argument1, Argument2);
-  typedef Result result_type;
-  typedef Argument1 first_argument_type;
-  typedef Argument2 second_argument_type;
-}; // end binary_traits_imp
-
-} // end detail
-
-template<typename Operation>
-  struct unary_traits
-{
-  typedef typename detail::unary_traits_imp<Operation*>::function_type function_type;
-  typedef typename detail::unary_traits_imp<Operation*>::param_type    param_type;
-  typedef typename detail::unary_traits_imp<Operation*>::result_type   result_type;
-  typedef typename detail::unary_traits_imp<Operation*>::argument_type argument_type;
-}; // end unary_traits
-
-template<typename Result, typename Argument>
-  struct unary_traits<Result(*)(Argument)>
-{
-  typedef Result   (*function_type)(Argument);
-  typedef Result   (*param_type)(Argument);
-  typedef Result   result_type;
-  typedef Argument argument_type;
-}; // end unary_traits
-
-template<typename Operation>
-  struct binary_traits
-{
-  typedef typename detail::binary_traits_imp<Operation*>::function_type        function_type;
-  typedef typename detail::binary_traits_imp<Operation*>::param_type           param_type;
-  typedef typename detail::binary_traits_imp<Operation*>::result_type          result_type;
-  typedef typename detail::binary_traits_imp<Operation*>::first_argument_type  first_argument_type;
-  typedef typename detail::binary_traits_imp<Operation*>::second_argument_type second_argument_type;
-}; // end binary_traits
-
-template<typename Result, typename Argument1, typename Argument2>
-  struct binary_traits<Result(*)(Argument1, Argument2)>
-{
-  typedef Result (*function_type)(Argument1, Argument2);
-  typedef Result (*param_type)(Argument1, Argument2);
-  typedef Result result_type;
-  typedef Argument1 first_argument_type;
-  typedef Argument2 second_argument_type;
-}; // end binary_traits
-
-template<typename Predicate>
-  unary_negate<Predicate> not1(const Predicate &pred)
-{
-  return unary_negate<Predicate>(pred);
-} // end not1()
-
-template<typename BinaryPredicate>
-  binary_negate<BinaryPredicate> not2(const BinaryPredicate &pred)
-{
-  return binary_negate<BinaryPredicate>(pred);
-} // end not2()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/actor.h b/compat/thrust/detail/functional/actor.h
deleted file mode 100644
index 0b95a6b894..0000000000
--- a/compat/thrust/detail/functional/actor.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// Portions of this code are derived from
-//
-// Manjunath Kudlur's Carbon library
-//
-// and
-//
-// Based on Boost.Phoenix v1.2
-// Copyright (c) 2001-2002 Joel de Guzman
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/functional/value.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/assignment_operator.h>
-#include <thrust/detail/type_traits/result_of.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename Action, typename Env>
-  struct apply_actor
-{
-  typedef typename Action::template result<Env>::type type;
-};
-
-template<typename Eval>
-  struct actor
-    : Eval
-{
-  typedef Eval eval_type;
-
-  __host__ __device__
-  actor(void);
-
-  __host__ __device__
-  actor(const Eval &base);
-
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::null_type >::type
-  operator()(void) const;
-
-  template<typename T0>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&> >::type
-  operator()(T0 &_0) const;
-
-  template<typename T0, typename T1>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&> >::type
-  operator()(T0 &_0, T1 &_1) const;
-
-  template<typename T0, typename T1, typename T2>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2) const;
-
-  template<typename T0, typename T1, typename T2, typename T3>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const;
-
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
-  __host__ __device__
-  typename apply_actor<eval_type, thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&,T9&> >::type
-  operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const;
-
-  template<typename T>
-  __host__ __device__
-  typename assign_result<Eval,T>::type
-  operator=(const T &_1) const;
-}; // end actor
-
-// in general, as_actor should turn things into values
-template<typename T>
-  struct as_actor
-{
-  typedef value<T> type;
-
-  static inline __host__ __device__ type convert(const T &x)
-  {
-    return val(x);
-  } // end convert()
-}; // end as_actor
-
-// specialization for things which are already actors
-template<typename Eval>
-  struct as_actor<actor<Eval> >
-{
-  typedef actor<Eval> type;
-
-  static inline __host__ __device__ const type &convert(const actor<Eval> &x)
-  {
-    return x;
-  } // end convert()
-}; // end as_actor
-
-template<typename T>
-  typename as_actor<T>::type
-  __host__ __device__
-    make_actor(const T &x)
-{
-  return as_actor<T>::convert(x);
-} // end make_actor()
-
-} // end functional
-
-// provide specializations for result_of for nullary, unary, and binary invocations of actor
-template<typename Eval>
-  struct result_of<
-    thrust::detail::functional::actor<Eval>()
-  >
-{
-  typedef typename thrust::detail::functional::apply_actor<
-    thrust::detail::functional::actor<Eval>,
-    thrust::null_type
-  >::type type;
-}; // end result_of
-
-template<typename Eval, typename Arg1>
-  struct result_of<
-    thrust::detail::functional::actor<Eval>(Arg1)
-  >
-{
-  typedef typename thrust::detail::functional::apply_actor<
-    thrust::detail::functional::actor<Eval>,
-    thrust::tuple<Arg1>
-  >::type type;
-}; // end result_of
-
-template<typename Eval, typename Arg1, typename Arg2>
-  struct result_of<
-    thrust::detail::functional::actor<Eval>(Arg1,Arg2)
-  >
-{
-  typedef typename thrust::detail::functional::apply_actor<
-    thrust::detail::functional::actor<Eval>,
-    thrust::tuple<Arg1,Arg2>
-  >::type type;
-}; // end result_of
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/functional/actor.inl>
-
diff --git a/compat/thrust/detail/functional/actor.inl b/compat/thrust/detail/functional/actor.inl
deleted file mode 100644
index 84347be7b8..0000000000
--- a/compat/thrust/detail/functional/actor.inl
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// Portions of this code are derived from
-//
-// Manjunath Kudlur's Carbon library
-//
-// and
-//
-// Based on Boost.Phoenix v1.2
-// Copyright (c) 2001-2002 Joel de Guzman
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/assignment_operator.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-namespace functional
-{
-
-template<typename Eval>
-  actor<Eval>
-    ::actor(void)
-      : eval_type()
-{}
-
-template<typename Eval>
-  actor<Eval>
-    ::actor(const Eval &base)
-      : eval_type(base)
-{}
-
-template<typename Eval>
-  typename apply_actor<
-    typename actor<Eval>::eval_type,
-    typename thrust::null_type
-  >::type
-    actor<Eval>
-      ::operator()(void) const
-{
-  return eval_type::eval(thrust::null_type());
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0) const
-{
-  return eval_type::eval(thrust::tie(_0));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1) const
-{
-  return eval_type::eval(thrust::tie(_0,_1));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
-    typename apply_actor<
-      typename actor<Eval>::eval_type,
-      typename thrust::tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&,T9&>
-    >::type
-      actor<Eval>
-        ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const
-{
-  return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9));
-} // end basic_environment::operator()
-
-template<typename Eval>
-  template<typename T>
-    typename assign_result<Eval,T>::type
-      actor<Eval>
-        ::operator=(const T& _1) const
-{
-  return do_assign(*this,_1);
-} // end actor::operator=()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/argument.h b/compat/thrust/detail/functional/argument.h
deleted file mode 100644
index 96a20bed1f..0000000000
--- a/compat/thrust/detail/functional/argument.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// Portions of this code are derived from
-//
-// Manjunath Kudlur's Carbon library
-//
-// and
-//
-// Based on Boost.Phoenix v1.2
-// Copyright (c) 2001-2002 Joel de Guzman
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/tuple.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<unsigned int i, typename Env>
-  struct argument_helper
-{
-  typedef typename thrust::tuple_element<i,Env>::type type;
-};
-
-template<unsigned int i>
-  struct argument_helper<i,thrust::null_type>
-{
-  typedef thrust::null_type type;
-};
-
-
-template<unsigned int i>
-  class argument
-{
-  public:
-    template<typename Env>
-      struct result
-        : argument_helper<i,Env>
-    {
-    };
-
-    __host__ __device__
-    argument(void){}
-
-    template<typename Env>
-    __host__ __device__
-    typename result<Env>::type eval(const Env &e) const
-    {
-      return thrust::get<i>(e);
-    } // end eval()
-}; // end argument
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/composite.h b/compat/thrust/detail/functional/composite.h
deleted file mode 100644
index 1d5fde3152..0000000000
--- a/compat/thrust/detail/functional/composite.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// Portions of this code are derived from
-//
-// Manjunath Kudlur's Carbon library
-//
-// and
-//
-// Based on Boost.Phoenix v1.2
-// Copyright (c) 2001-2002 Joel de Guzman
-
-#pragma once
-
-#include <thrust/detail/functional/actor.h>
-#include <thrust/tuple.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-// XXX we should just take a single EvalTuple
-template<typename Eval0,
-         typename Eval1  = thrust::null_type,
-         typename Eval2  = thrust::null_type,
-         typename Eval3  = thrust::null_type,
-         typename Eval4  = thrust::null_type,
-         typename Eval5  = thrust::null_type,
-         typename Eval6  = thrust::null_type,
-         typename Eval7  = thrust::null_type,
-         typename Eval8  = thrust::null_type,
-         typename Eval9  = thrust::null_type,
-         typename Eval10 = thrust::null_type>
-  class composite;
-
-template<typename Eval0, typename Eval1>
-  class composite<
-    Eval0,
-    Eval1,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type
-  >
-{
-  public:
-    template<typename Env>
-      struct result
-    {
-      typedef typename Eval0::template result<
-        thrust::tuple<
-          typename Eval1::template result<Env>::type
-        >
-      >::type type;
-    };
-
-    __host__ __device__
-    composite(const Eval0 &e0, const Eval1 &e1)
-      : m_eval0(e0),
-        m_eval1(e1)
-    {}
-
-    template<typename Env>
-    __host__ __device__
-    typename result<Env>::type
-    eval(const Env &x) const
-    {
-      typename Eval1::template result<Env>::type result1 = m_eval1.eval(x);
-      return m_eval0.eval(thrust::tie(result1));
-    }
-
-  private:
-    Eval0 m_eval0;
-    Eval1 m_eval1;
-}; // end composite<Eval0,Eval1>
-
-template<typename Eval0, typename Eval1, typename Eval2>
-  class composite<
-    Eval0,
-    Eval1,
-    Eval2,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type,
-    thrust::null_type
-  >
-{
-  public:
-    template<typename Env>
-      struct result
-    {
-      typedef typename Eval0::template result<
-        thrust::tuple<
-          typename Eval1::template result<Env>::type,
-          typename Eval2::template result<Env>::type
-        >
-      >::type type;
-    };
-
-    __host__ __device__
-    composite(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2)
-      : m_eval0(e0),
-        m_eval1(e1),
-        m_eval2(e2)
-    {}
-
-    template<typename Env>
-    __host__ __device__
-    typename result<Env>::type
-    eval(const Env &x) const
-    {
-      typename Eval1::template result<Env>::type result1 = m_eval1.eval(x);
-      typename Eval2::template result<Env>::type result2 = m_eval2.eval(x);
-      return m_eval0.eval(thrust::tie(result1,result2));
-    }
-
-  private:
-    Eval0 m_eval0;
-    Eval1 m_eval1;
-    Eval2 m_eval2;
-}; // end composite<Eval0,Eval1,Eval2>
-
-template<typename Eval0, typename Eval1>
-__host__ __device__
-  actor<composite<Eval0,Eval1> > compose(const Eval0 &e0, const Eval1 &e1)
-{
-  return actor<composite<Eval0,Eval1> >(composite<Eval0,Eval1>(e0,e1));
-}
-
-template<typename Eval0, typename Eval1, typename Eval2>
-__host__ __device__
-  actor<composite<Eval0,Eval1,Eval2> > compose(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2)
-{
-  return actor<composite<Eval0,Eval1,Eval2> >(composite<Eval0,Eval1,Eval2>(e0,e1,e2));
-}
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators.h b/compat/thrust/detail/functional/operators.h
deleted file mode 100644
index 0fc3539cb9..0000000000
--- a/compat/thrust/detail/functional/operators.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/operators/arithmetic_operators.h>
-#include <thrust/detail/functional/operators/relational_operators.h>
-#include <thrust/detail/functional/operators/logical_operators.h>
-#include <thrust/detail/functional/operators/bitwise_operators.h>
-#include <thrust/detail/functional/operators/compound_assignment_operators.h>
-
diff --git a/compat/thrust/detail/functional/operators/arithmetic_operators.h b/compat/thrust/detail/functional/operators/arithmetic_operators.h
deleted file mode 100644
index a11e7acdd1..0000000000
--- a/compat/thrust/detail/functional/operators/arithmetic_operators.h
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<thrust::negate>,
-    actor<Eval>
-  >
->
-__host__ __device__
-operator-(const actor<Eval> &_1)
-{
-  return compose(unary_operator<thrust::negate>(), _1);
-} // end operator-()
-
-// there's no standard unary_plus functional, so roll an ad hoc one here
-template<typename T>
-  struct unary_plus
-    : public thrust::unary_function<T,T>
-{
-  __host__ __device__ T operator()(const T &x) const {return +x;}
-}; // end unary_plus
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<unary_plus>,
-    actor<Eval>
-  >
->
-operator+(const actor<Eval> &_1)
-{
-  return compose(unary_operator<unary_plus>(), _1);
-} // end operator+()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::plus>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator+(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::plus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator+()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::plus>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator+(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::plus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator+()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::plus>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator+(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::plus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator+()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::minus>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator-(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::minus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator-()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::minus>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator-(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::minus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator-()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::minus>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator-(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::minus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator-()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::multiplies>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator*(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::multiplies>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator*()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::multiplies>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator*(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::multiplies>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator*()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::multiplies>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator*(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::multiplies>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator*()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::divides>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator/(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::divides>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator/()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::divides>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator/(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::divides>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator/()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::divides>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator/(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::divides>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator/()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::modulus>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator%(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::modulus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator%()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::modulus>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator%(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::modulus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator%()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::modulus>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator%(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::modulus>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator%()
-
-// there's no standard prefix_increment functional, so roll an ad hoc one here
-template<typename T>
-  struct prefix_increment
-    : public thrust::unary_function<T&,T&>
-{
-  __host__ __device__ T& operator()(T &x) const { return ++x; }
-}; // end prefix_increment
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<prefix_increment>,
-    actor<Eval>
-  >
->
-operator++(const actor<Eval> &_1)
-{
-  return compose(unary_operator<prefix_increment>(), _1);
-} // end operator++()
-
-// there's no standard suffix_increment functional, so roll an ad hoc one here
-template<typename T>
-  struct suffix_increment
-    : public thrust::unary_function<T&,T>
-{
-  __host__ __device__ T operator()(T &x) const { return x++; }
-}; // end suffix_increment
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<suffix_increment>,
-    actor<Eval>
-  >
->
-operator++(const actor<Eval> &_1, int)
-{
-  return compose(unary_operator<suffix_increment>(), _1);
-} // end operator++()
-
-// there's no standard prefix_decrement functional, so roll an ad hoc one here
-template<typename T>
-  struct prefix_decrement
-    : public thrust::unary_function<T&,T&>
-{
-  __host__ __device__ T& operator()(T &x) const { return --x; }
-}; // end prefix_decrement
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<prefix_decrement>,
-    actor<Eval>
-  >
->
-operator--(const actor<Eval> &_1)
-{
-  return compose(unary_operator<prefix_decrement>(), _1);
-} // end operator--()
-
-// there's no standard suffix_decrement functional, so roll an ad hoc one here
-template<typename T>
-  struct suffix_decrement
-    : public thrust::unary_function<T&,T>
-{
-  __host__ __device__ T operator()(T &x) const { return x--; }
-}; // end suffix_decrement
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<suffix_decrement>,
-    actor<Eval>
-  >
->
-operator--(const actor<Eval> &_1, int)
-{
-  return compose(unary_operator<suffix_decrement>(), _1);
-} // end operator--()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/assignment_operator.h b/compat/thrust/detail/functional/operators/assignment_operator.h
deleted file mode 100644
index e5d66202bf..0000000000
--- a/compat/thrust/detail/functional/operators/assignment_operator.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-
-// XXX WAR circular inclusion with this forward declaration
-template<typename,typename,typename> struct binary_function;
-
-namespace detail
-{
-namespace functional
-{
-
-// XXX WAR circular inclusion with this forward declaration
-template<typename> struct as_actor;
-
-// there's no standard assign functional, so roll an ad hoc one here
-template<typename T>
-  struct assign
-    : thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs = rhs; }
-}; // end assign
-
-template<typename Eval, typename T>
-  struct assign_result
-{
-  typedef actor<
-    composite<
-      binary_operator<assign>,
-      actor<Eval>,
-      typename as_actor<T>::type
-    >
-  > type;
-}; // end assign_result
-
-template<typename Eval, typename T>
-  __host__ __device__
-    typename assign_result<Eval,T>::type
-      do_assign(const actor<Eval> &_1, const T &_2)
-{
-  return compose(binary_operator<assign>(),
-                 _1,
-                 as_actor<T>::convert(_2));
-} // end do_assign()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/bitwise_operators.h b/compat/thrust/detail/functional/operators/bitwise_operators.h
deleted file mode 100644
index c89c5d4f83..0000000000
--- a/compat/thrust/detail/functional/operators/bitwise_operators.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_and>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator&(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::bit_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_and>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator&(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_and>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator&(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_or>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator|(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::bit_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_or>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator|(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_or>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator|(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_xor>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator^(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::bit_xor>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator^()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_xor>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator^(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_xor>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator^()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::bit_xor>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator^(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::bit_xor>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator^()
-
-// there's no standard bit_not functional, so roll an ad hoc one here
-template<typename T>
-  struct bit_not
-    : public thrust::unary_function<T,T>
-{
-  __host__ __device__ T operator()(const T &x) const {return ~x;}
-}; // end bit_not
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<bit_not>,
-    actor<Eval>
-  >
->
-__host__ __device__
-operator~(const actor<Eval> &_1)
-{
-  return compose(unary_operator<bit_not>(), _1);
-} // end operator~()
-
-// there's no standard bit_lshift functional, so roll an ad hoc one here
-template<typename T>
-  struct bit_lshift
-    : public thrust::binary_function<T,T,T>
-{
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs << rhs;}
-}; // end bit_lshift
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_lshift>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator<<(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_lshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<<()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_lshift>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator<<(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_lshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<<()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_lshift>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator<<(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_lshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<<()
-
-// there's no standard bit_rshift functional, so roll an ad hoc one here
-template<typename T>
-  struct bit_rshift
-    : public thrust::binary_function<T,T,T>
-{
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs >> rhs;}
-}; // end bit_rshift
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_rshift>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator>>(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_rshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>>()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_rshift>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator>>(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_rshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>>()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_rshift>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator>>(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_rshift>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>>()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/compound_assignment_operators.h b/compat/thrust/detail/functional/operators/compound_assignment_operators.h
deleted file mode 100644
index ef7389b55c..0000000000
--- a/compat/thrust/detail/functional/operators/compound_assignment_operators.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename T>
-  struct plus_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs += rhs; }
-}; // end plus_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<plus_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator+=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<plus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator+=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<plus_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator+=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<plus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator+=()
-
-template<typename T>
-  struct minus_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs -= rhs; }
-}; // end minus_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<minus_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator-=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<minus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator-=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<minus_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator-=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<minus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator-=()
-
-template<typename T>
-  struct multiplies_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs *= rhs; }
-}; // end multiplies_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<multiplies_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator*=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<multiplies_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator*=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<multiplies_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator*=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<multiplies_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator*=()
-
-template<typename T>
-  struct divides_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs /= rhs; }
-}; // end divides_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<divides_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator/=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<divides_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator/=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<divides_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator/=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<divides_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator/=()
-
-template<typename T>
-  struct modulus_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs %= rhs; }
-}; // end modulus_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<modulus_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator%=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<modulus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator%=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<modulus_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator%=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<modulus_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator%=()
-
-template<typename T>
-  struct bit_and_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs &= rhs; }
-}; // end bit_and_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_and_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator&=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_and_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_and_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator&=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_and_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&=()
-
-template<typename T>
-  struct bit_or_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs |= rhs; }
-}; // end bit_or_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_or_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator|=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_or_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_or_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator|=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_or_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|=()
-
-template<typename T>
-  struct bit_xor_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs ^= rhs; }
-}; // end bit_xor_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_xor_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator^=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_xor_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_xor_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator^=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_xor_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator|=()
-
-template<typename T>
-  struct bit_lshift_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs <<= rhs; }
-}; // end bit_lshift_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_lshift_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator<<=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_lshift_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<<=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_lshift_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator<<=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_lshift_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<<=()
-
-template<typename T>
-  struct bit_rshift_equal
-    : public thrust::binary_function<T&,T,T&>
-{
-  __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs >>= rhs; }
-}; // end bit_rshift_equal
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_rshift_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator>>=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<bit_rshift_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>>=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<bit_rshift_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator>>=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<bit_rshift_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>>=()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/logical_operators.h b/compat/thrust/detail/functional/operators/logical_operators.h
deleted file mode 100644
index 9c952620db..0000000000
--- a/compat/thrust/detail/functional/operators/logical_operators.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_and>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator&&(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::logical_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_and>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator&&(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::logical_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_and>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator&&(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::logical_and>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_or>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator||(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::logical_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_or>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator||(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::logical_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::logical_or>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator||(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::logical_or>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator&&()
-
-template<typename Eval>
-__host__ __device__
-actor<
-  composite<
-    unary_operator<thrust::logical_not>,
-    actor<Eval>
-  >
->
-operator!(const actor<Eval> &_1)
-{
-  return compose(unary_operator<thrust::logical_not>(), _1);
-} // end operator!()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/operator_adaptors.h b/compat/thrust/detail/functional/operators/operator_adaptors.h
deleted file mode 100644
index d35fe9726b..0000000000
--- a/compat/thrust/detail/functional/operators/operator_adaptors.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-// this thing (which models Eval) is an adaptor for the unary
-// functors inside functional.h
-template<template<typename> class UnaryOperator>
-  struct unary_operator
-{
-  template<typename Env>
-    struct argument
-      : thrust::detail::eval_if<
-          (thrust::tuple_size<Env>::value == 0),
-          thrust::detail::identity_<thrust::null_type>,
-          thrust::tuple_element<0,Env>
-        >
-  {
-  };
-
-  template<typename Env>
-    struct operator_type
-  {
-    typedef UnaryOperator<
-      typename thrust::detail::remove_reference<
-        typename argument<Env>::type
-      >::type
-    > type;
-  };
-
-  template<typename Env>
-    struct result
-  {
-    typedef typename operator_type<Env>::type op_type;
-    typedef typename op_type::result_type type;
-  };
-
-  template<typename Env>
-  __host__ __device__
-  typename result<Env>::type eval(const Env &e) const
-  {
-    typename operator_type<Env>::type op;
-    return op(thrust::get<0>(e));
-  } // end eval()
-}; // end unary_operator
-
-// this thing (which models Eval) is an adaptor for the binary
-// functors inside functional.h
-template<template<typename> class BinaryOperator>
-  struct binary_operator
-{
-  template<typename Env>
-    struct first_argument
-      : thrust::detail::eval_if<
-          (thrust::tuple_size<Env>::value == 0),
-          thrust::detail::identity_<thrust::null_type>,
-          thrust::tuple_element<0,Env>
-        >
-  {
-  };
-
-  template<typename Env>
-    struct operator_type
-  {
-    typedef BinaryOperator<
-      typename thrust::detail::remove_reference<
-        typename first_argument<Env>::type
-      >::type
-    > type;
-  };
-
-  template<typename Env>
-    struct result
-  {
-    typedef typename operator_type<Env>::type op_type;
-    typedef typename op_type::result_type type;
-  };
-
-  template<typename Env>
-  __host__ __device__
-  typename result<Env>::type eval(const Env &e) const
-  {
-    typename operator_type<Env>::type op;
-    return op(thrust::get<0>(e), thrust::get<1>(e));
-  } // end eval()
-}; // end binary_operator
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/operators/relational_operators.h b/compat/thrust/detail/functional/operators/relational_operators.h
deleted file mode 100644
index 6b26534430..0000000000
--- a/compat/thrust/detail/functional/operators/relational_operators.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/composite.h>
-#include <thrust/detail/functional/operators/operator_adaptors.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::equal_to>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator==(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator==()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::equal_to>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator==(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator==()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::equal_to>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator==(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator==()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::not_equal_to>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator!=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::not_equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator!=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::not_equal_to>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator!=(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::not_equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator!=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::not_equal_to>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator!=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::not_equal_to>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator!=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator>(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::greater>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator>(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::greater>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator>(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::greater>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator<(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::less>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator<(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::less>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator<(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::less>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator>=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::greater_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater_equal>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator>=(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::greater_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::greater_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator>=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::greater_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator>=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less_equal>,
-    actor<T1>,
-    typename as_actor<T2>::type
-  >
->
-operator<=(const actor<T1> &_1, const T2 &_2)
-{
-  return compose(binary_operator<thrust::less_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less_equal>,
-    typename as_actor<T1>::type,
-    actor<T2>
-  >
->
-operator<=(const T1 &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::less_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<=()
-
-template<typename T1, typename T2>
-__host__ __device__
-actor<
-  composite<
-    binary_operator<thrust::less_equal>,
-    actor<T1>,
-    actor<T2>
-  >
->
-operator<=(const actor<T1> &_1, const actor<T2> &_2)
-{
-  return compose(binary_operator<thrust::less_equal>(),
-                 make_actor(_1),
-                 make_actor(_2));
-} // end operator<=()
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/placeholder.h b/compat/thrust/detail/functional/placeholder.h
deleted file mode 100644
index 9acf6da803..0000000000
--- a/compat/thrust/detail/functional/placeholder.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-#include <thrust/detail/functional/argument.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-template<unsigned int i>
-  struct placeholder
-{
-  typedef actor<argument<i> > type;
-};
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/functional/value.h b/compat/thrust/detail/functional/value.h
deleted file mode 100644
index 27e2802e3a..0000000000
--- a/compat/thrust/detail/functional/value.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// Portions of this code are derived from
-//
-// Manjunath Kudlur's Carbon library
-//
-// and
-//
-// Based on Boost.Phoenix v1.2
-// Copyright (c) 2001-2002 Joel de Guzman
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/functional/actor.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace functional
-{
-
-
-template<typename Eval> struct actor;
-
-
-template<typename T>
-  class value
-{
-  public:
-
-    template<typename Env>
-      struct result
-    {
-      typedef T type;
-    };
-
-    __host__ __device__
-    value(const T &arg)
-      : m_val(arg)
-    {}
-
-    template<typename Env>
-    __host__ __device__
-      T eval(const Env &) const
-    {
-      return m_val;
-    }
-
-  private:
-    T m_val;
-}; // end value
-
-template<typename T>
-__host__ __device__
-actor<value<T> > val(const T &x)
-{
-  return value<T>(x);
-} // end val()
-
-
-} // end functional
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/gather.inl b/compat/thrust/detail/gather.inl
deleted file mode 100644
index 4edecd038a..0000000000
--- a/compat/thrust/detail/gather.inl
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file gather.inl
- *  \brief Inline file for gather.h.
- */
-
-#include <thrust/gather.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/gather.h>
-#include <thrust/system/detail/adl/gather.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        InputIterator                                               map_first,
-                        InputIterator                                               map_last,
-                        RandomAccessIterator                                        input_first,
-                        OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::gather;
-  return gather(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, input_first, result);
-} // end gather()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              map_first,
-                           InputIterator1                                              map_last,
-                           InputIterator2                                              stencil,
-                           RandomAccessIterator                                        input_first,
-                           OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::gather_if;
-  return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result);
-} // end gather_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              map_first,
-                           InputIterator1                                              map_last,
-                           InputIterator2                                              stencil,
-                           RandomAccessIterator                                        input_first,
-                           OutputIterator                                              result,
-                           Predicate                                                   pred)
-{
-  using thrust::system::detail::generic::gather_if;
-  return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result, pred);
-} // end gather_if()
-
-
-template<typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(InputIterator        map_first,
-                        InputIterator        map_last,
-                        RandomAccessIterator input_first,
-                        OutputIterator       result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type        System1; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System2; 
-  typedef typename thrust::iterator_system<OutputIterator>::type       System3; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::gather(select_system(system1,system2,system3), map_first, map_last, input_first, result);
-} // end gather()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(InputIterator1       map_first,
-                           InputIterator1       map_last,
-                           InputIterator2       stencil,
-                           RandomAccessIterator input_first,
-                           OutputIterator       result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type       System1; 
-  typedef typename thrust::iterator_system<InputIterator2>::type       System2; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System3; 
-  typedef typename thrust::iterator_system<OutputIterator>::type       System4; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result);
-} // end gather_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(InputIterator1       map_first,
-                           InputIterator1       map_last,
-                           InputIterator2       stencil,
-                           RandomAccessIterator input_first,
-                           OutputIterator       result,
-                           Predicate            pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type       System1; 
-  typedef typename thrust::iterator_system<InputIterator2>::type       System2; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System3; 
-  typedef typename thrust::iterator_system<OutputIterator>::type       System4; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result, pred);
-} // end gather_if()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/generate.inl b/compat/thrust/detail/generate.inl
deleted file mode 100644
index c12580452e..0000000000
--- a/compat/thrust/detail/generate.inl
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file generate.inl
- *  \author Jared Hoberock
- *  \brief Inline file for generate.h.
- */
-
-#include <thrust/generate.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/generate.h>
-#include <thrust/system/detail/adl/generate.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Generator>
-  void generate(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                Generator gen)
-{
-  using thrust::system::detail::generic::generate;
-  return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, gen);
-} // end generate()
-
-
-template<typename DerivedPolicy,
-         typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            OutputIterator first,
-                            Size n,
-                            Generator gen)
-{
-  using thrust::system::detail::generic::generate_n;
-  return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, gen);
-} // end generate_n()
-
-
-template<typename ForwardIterator,
-         typename Generator>
-  void generate(ForwardIterator first,
-                ForwardIterator last,
-                Generator gen)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::generate(select_system(system), first, last, gen);
-} // end generate()
-
-
-template<typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(OutputIterator first,
-                            Size n,
-                            Generator gen)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<OutputIterator>::type System;
-
-  System system;
-
-  return thrust::generate_n(select_system(system), first, n, gen);
-} // end generate_n()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/host_vector.inl b/compat/thrust/detail/host_vector.inl
deleted file mode 100644
index e5c60ab973..0000000000
--- a/compat/thrust/detail/host_vector.inl
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file host_vector.inl
- *  \brief Inline file for host_vector.h.
- */
-
-#include <thrust/host_vector.h>
-
-namespace thrust
-{
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    host_vector<T,Alloc>
-      ::host_vector(const device_vector<OtherT,OtherAlloc> &v)
-        :Parent(v)
-{
-  ;
-} // end host_vector::host_vector()
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/inner_product.inl b/compat/thrust/detail/inner_product.inl
deleted file mode 100644
index f7773d8d2c..0000000000
--- a/compat/thrust/detail/inner_product.inl
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file inner_product.inl
- *  \brief Inline file for inner_product.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/inner_product.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/inner_product.h>
-#include <thrust/system/detail/adl/inner_product.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputType>
-OutputType inner_product(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init)
-{
-  using thrust::system::detail::generic::inner_product;
-  return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init);
-} // end inner_product()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputType,
-         typename BinaryFunction1,
-         typename BinaryFunction2>
-OutputType inner_product(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init, 
-                         BinaryFunction1 binary_op1,
-                         BinaryFunction2 binary_op2)
-{
-  using thrust::system::detail::generic::inner_product;
-  return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init, binary_op1, binary_op2);
-} // end inner_product()
-
-
-template <typename InputIterator1, typename InputIterator2, typename OutputType>
-OutputType 
-inner_product(InputIterator1 first1, InputIterator1 last1,
-              InputIterator2 first2, OutputType init)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init);
-} // end inner_product()
-
-
-template <typename InputIterator1, typename InputIterator2, typename OutputType,
-          typename BinaryFunction1, typename BinaryFunction2>
-OutputType
-inner_product(InputIterator1 first1, InputIterator1 last1,
-              InputIterator2 first2, OutputType init, 
-              BinaryFunction1 binary_op1, BinaryFunction2 binary_op2)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init, binary_op1, binary_op2);
-} // end inner_product()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/integer_traits.h b/compat/thrust/detail/integer_traits.h
deleted file mode 100644
index e4cf5d159f..0000000000
--- a/compat/thrust/detail/integer_traits.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <limits>
-#include <limits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename T>
-  class integer_traits
-{
-  public:
-    static const bool is_integral = false;
-};
-
-template<typename T, T min_val, T max_val>
-  class integer_traits_base
-{
-  public:
-    static const bool is_integral = true;
-    static const T const_min = min_val;
-    static const T const_max = max_val;
-};
-
-
-template<>
-  class integer_traits<bool>
-    : public std::numeric_limits<bool>,
-      public integer_traits_base<bool, false, true>
-{};
-
-
-template<>
-  class integer_traits<char>
-    : public std::numeric_limits<char>,
-      public integer_traits_base<char, CHAR_MIN, CHAR_MAX>
-{};
-
-
-template<>
-  class integer_traits<signed char>
-    : public std::numeric_limits<signed char>,
-      public integer_traits_base<signed char, SCHAR_MIN, SCHAR_MAX>
-{};
-
-
-template<>
-  class integer_traits<unsigned char>
-    : public std::numeric_limits<unsigned char>,
-      public integer_traits_base<unsigned char, 0, UCHAR_MAX>
-{};
-
-
-template<>
-  class integer_traits<short>
-    : public std::numeric_limits<short>,
-      public integer_traits_base<short, SHRT_MIN, SHRT_MAX>
-{};
-
-
-template<>
-  class integer_traits<unsigned short>
-    : public std::numeric_limits<unsigned short>,
-      public integer_traits_base<unsigned short, 0, USHRT_MAX>
-{};
-
-
-template<>
-  class integer_traits<int>
-    : public std::numeric_limits<int>,
-      public integer_traits_base<int, INT_MIN, INT_MAX>
-{};
-
-
-template<>
-  class integer_traits<unsigned int>
-    : public std::numeric_limits<unsigned int>,
-      public integer_traits_base<unsigned int, 0, UINT_MAX>
-{};
-
-
-template<>
-  class integer_traits<long>
-    : public std::numeric_limits<long>,
-      public integer_traits_base<long, LONG_MIN, LONG_MAX>
-{};
-
-
-template<>
-  class integer_traits<unsigned long>
-    : public std::numeric_limits<unsigned long>,
-      public integer_traits_base<unsigned long, 0, ULONG_MAX>
-{};
-
-
-template<>
-  class integer_traits<long long>
-    : public std::numeric_limits<long long>,
-      public integer_traits_base<long long, LLONG_MIN, LLONG_MAX>
-{};
-
-
-template<>
-  class integer_traits<unsigned long long>
-    : public std::numeric_limits<unsigned long long>,
-      public integer_traits_base<unsigned long long, 0, ULLONG_MAX>
-{};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/internal_functional.h b/compat/thrust/detail/internal_functional.h
deleted file mode 100644
index 6d5264ae35..0000000000
--- a/compat/thrust/detail/internal_functional.h
+++ /dev/null
@@ -1,678 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file internal_functional.inl
- *  \brief Non-public functionals used to implement algorithm internals.
- */
-
-#pragma once
-
-#include <thrust/tuple.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/tuple_of_iterator_references.h>
-#include <thrust/detail/raw_reference_cast.h>
-#include <memory> // for ::new
-
-namespace thrust
-{
-namespace detail
-{
-
-// unary_negate does not need to know argument_type
-template <typename Predicate>
-struct unary_negate
-{
-    typedef bool result_type;
-
-    Predicate pred;
-
-    __host__ __device__
-    explicit unary_negate(const Predicate& pred) : pred(pred) {}
-
-    template <typename T>
-    __host__ __device__
-    bool operator()(const T& x)
-    {
-        return !bool(pred(x));
-    }
-};
-
-// binary_negate does not need to know first_argument_type or second_argument_type
-template <typename Predicate>
-struct binary_negate
-{
-    typedef bool result_type;
-
-    Predicate pred;
-
-    __host__ __device__
-    explicit binary_negate(const Predicate& pred) : pred(pred) {}
-
-    template <typename T1, typename T2>
-        __host__ __device__
-        bool operator()(const T1& x, const T2& y)
-        {
-            return !bool(pred(x,y));
-        }
-};
-
-template<typename Predicate>
-  __host__ __device__
-  thrust::detail::unary_negate<Predicate> not1(const Predicate &pred)
-{
-    return thrust::detail::unary_negate<Predicate>(pred);
-}
-
-template<typename Predicate>
-  __host__ __device__
-  thrust::detail::binary_negate<Predicate> not2(const Predicate &pred)
-{
-    return thrust::detail::binary_negate<Predicate>(pred);
-}
-
-
-// convert a predicate to a 0 or 1 integral value
-template <typename Predicate, typename IntegralType>
-struct predicate_to_integral
-{
-    Predicate pred;
-
-    __host__ __device__
-    explicit predicate_to_integral(const Predicate& pred) : pred(pred) {}
-
-    template <typename T>
-        __host__ __device__
-        bool operator()(const T& x)
-        {
-            return pred(x) ? IntegralType(1) : IntegralType(0);
-        }
-};
-
-
-// note that detail::equal_to does not force conversion from T2 -> T1 as equal_to does
-template <typename T1>
-struct equal_to
-{
-    typedef bool result_type;
-
-    template <typename T2>
-        __host__ __device__
-        bool operator()(const T1& lhs, const T2& rhs) const
-        {
-            return lhs == rhs;
-        }
-};
-
-// note that equal_to_value does not force conversion from T2 -> T1 as equal_to does
-template <typename T2>
-struct equal_to_value
-{
-    T2 rhs;
-
-    equal_to_value(const T2& rhs) : rhs(rhs) {}
-
-    template <typename T1>
-        __host__ __device__
-        bool operator()(const T1& lhs) const
-        {
-            return lhs == rhs;
-        }
-};
-
-template <typename Predicate>
-struct tuple_binary_predicate
-{
-    typedef bool result_type;
-
-    __host__ __device__
-        tuple_binary_predicate(const Predicate& p) : pred(p) {}
-
-    template<typename Tuple>
-        __host__ __device__
-        bool operator()(const Tuple& t) const
-        { 
-            return pred(thrust::get<0>(t), thrust::get<1>(t));
-        }
-
-    Predicate pred;
-};
-
-template <typename Predicate>
-struct tuple_not_binary_predicate
-{
-    typedef bool result_type;
-
-    __host__ __device__
-        tuple_not_binary_predicate(const Predicate& p) : pred(p) {}
-
-    template<typename Tuple>
-        __host__ __device__
-        bool operator()(const Tuple& t) const
-        { 
-            return !pred(thrust::get<0>(t), thrust::get<1>(t));
-        }
-
-    Predicate pred;
-};
-
-template<typename Generator>
-  struct host_generate_functor
-{
-  typedef void result_type;
-
-  __host__ __device__
-  host_generate_functor(Generator g)
-    : gen(g) {}
-
-  // operator() does not take an lvalue reference because some iterators
-  // produce temporary proxy references when dereferenced. for example,
-  // consider the temporary tuple of references produced by zip_iterator.
-  // such temporaries cannot bind to an lvalue reference.
-  //
-  // to WAR this, accept a const reference (which is bindable to a temporary),
-  // and const_cast in the implementation.
-  //
-  // XXX change to an rvalue reference upon c++0x (which either a named variable
-  //     or temporary can bind to)
-  template<typename T>
-  __host__
-  void operator()(const T &x)
-  {
-    // we have to be naughty and const_cast this to get it to work
-    T &lvalue = const_cast<T&>(x);
-
-    // this assigns correctly whether x is a true reference or proxy
-    lvalue = gen();
-  }
-
-  Generator gen;
-};
-
-template<typename Generator>
-  struct device_generate_functor
-{
-  typedef void result_type;
-
-  __host__ __device__
-  device_generate_functor(Generator g)
-    : gen(g) {}
-
-  // operator() does not take an lvalue reference because some iterators
-  // produce temporary proxy references when dereferenced. for example,
-  // consider the temporary tuple of references produced by zip_iterator.
-  // such temporaries cannot bind to an lvalue reference.
-  //
-  // to WAR this, accept a const reference (which is bindable to a temporary),
-  // and const_cast in the implementation.
-  //
-  // XXX change to an rvalue reference upon c++0x (which either a named variable
-  //     or temporary can bind to)
-  template<typename T>
-  __host__ __device__
-  void operator()(const T &x)
-  {
-    // we have to be naughty and const_cast this to get it to work
-    T &lvalue = const_cast<T&>(x);
-
-    // this assigns correctly whether x is a true reference or proxy
-    lvalue = gen();
-  }
-
-  Generator gen;
-};
-
-template<typename System, typename Generator>
-  struct generate_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_generate_functor<Generator> >,
-        thrust::detail::identity_<device_generate_functor<Generator> >
-      >
-{};
-
-
-template<typename ResultType, typename BinaryFunction>
-  struct zipped_binary_op
-{
-  typedef ResultType result_type;
-
-  __host__ __device__
-  zipped_binary_op(BinaryFunction binary_op)
-    : m_binary_op(binary_op) {}
-
-  template<typename Tuple>
-  __host__ __device__
-  inline result_type operator()(Tuple t)
-  {
-    return m_binary_op(thrust::get<0>(t), thrust::get<1>(t));
-  }
-
-  BinaryFunction m_binary_op;
-};
-
-
-template<typename T>
-  struct is_non_const_reference
-    : thrust::detail::and_<
-        thrust::detail::not_<thrust::detail::is_const<T> >,
-        thrust::detail::is_reference<T>
-      >
-{};
-
-template<typename T> struct is_tuple_of_iterator_references : thrust::detail::false_type {};
-
-template<typename T1, typename T2, typename T3,
-         typename T4, typename T5, typename T6,
-         typename T7, typename T8, typename T9,
-         typename T10>
-  struct is_tuple_of_iterator_references<
-    thrust::detail::tuple_of_iterator_references<
-      T1,T2,T3,T4,T5,T6,T7,T8,T9,T10
-    >
-  >
-    : thrust::detail::true_type
-{};
-
-// use this enable_if to avoid assigning to temporaries in the transform functors below
-// XXX revisit this problem with c++11 perfect forwarding
-template<typename T>
-  struct enable_if_non_const_reference_or_tuple_of_iterator_references
-    : thrust::detail::enable_if<
-        is_non_const_reference<T>::value || is_tuple_of_iterator_references<T>::value
-      >
-{};
-
-
-template<typename UnaryFunction>
-  struct host_unary_transform_functor
-{
-  typedef void result_type;
-
-  UnaryFunction f;
-
-  host_unary_transform_functor(UnaryFunction f_)
-    :f(f_) {}
-
-  template<typename Tuple>
-  inline __host__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<1,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    thrust::get<1>(t) = f(thrust::get<0>(t));
-  }
-};
-
-template<typename UnaryFunction>
-  struct device_unary_transform_functor
-{
-  typedef void result_type;
-
-  UnaryFunction f;
-
-  device_unary_transform_functor(UnaryFunction f_)
-    :f(f_) {}
-
-  // add __host__ to allow the omp backend compile with nvcc
-  template<typename Tuple>
-  inline __host__ __device__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<1,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    thrust::get<1>(t) = f(thrust::get<0>(t));
-  }
-};
-
-
-template<typename System, typename UnaryFunction>
-  struct unary_transform_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_unary_transform_functor<UnaryFunction> >,
-        thrust::detail::identity_<device_unary_transform_functor<UnaryFunction> >
-      >
-{};
-
-
-template <typename BinaryFunction>
-  struct host_binary_transform_functor
-{
-  BinaryFunction f;
-
-  host_binary_transform_functor(BinaryFunction f_)
-    :f(f_)
-  {}
-
-  template <typename Tuple>
-  __host__
-  void operator()(Tuple t)
-  { 
-    thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t));
-  }
-}; // end binary_transform_functor
-
-
-template <typename BinaryFunction>
-  struct device_binary_transform_functor
-{
-  BinaryFunction f;
-
-  device_binary_transform_functor(BinaryFunction f_)
-    :f(f_)
-  {}
-
-  // add __host__ to allow the omp backend compile with nvcc
-  template <typename Tuple>
-  inline __host__ __device__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<2,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  { 
-    thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t));
-  }
-}; // end binary_transform_functor
-
-
-template<typename System, typename BinaryFunction>
-  struct binary_transform_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_binary_transform_functor<BinaryFunction> >,
-        thrust::detail::identity_<device_binary_transform_functor<BinaryFunction> >
-      >
-{};
-
-
-template <typename UnaryFunction, typename Predicate>
-struct host_unary_transform_if_functor
-{
-  UnaryFunction unary_op;
-  Predicate pred;
-
-  host_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_)
-    : unary_op(unary_op_), pred(pred_) {}
-
-  template<typename Tuple>
-  inline __host__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<1,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<0>(t)))
-    {
-      thrust::get<1>(t) = unary_op(thrust::get<0>(t));
-    }
-  }
-}; // end host_unary_transform_if_functor
-
-
-template <typename UnaryFunction, typename Predicate>
-struct device_unary_transform_if_functor
-{
-  UnaryFunction unary_op;
-  Predicate pred;
-
-  device_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_)
-    : unary_op(unary_op_), pred(pred_) {}
-
-  template<typename Tuple>
-  inline __host__ __device__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<1,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<0>(t)))
-    {
-      thrust::get<1>(t) = unary_op(thrust::get<0>(t));
-    }
-  }
-}; // end device_unary_transform_if_functor
-
-
-template<typename System, typename UnaryFunction, typename Predicate>
-  struct unary_transform_if_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_unary_transform_if_functor<UnaryFunction,Predicate> >,
-        thrust::detail::identity_<device_unary_transform_if_functor<UnaryFunction,Predicate> >
-      >
-{};
-
-
-template <typename UnaryFunction, typename Predicate>
-struct host_unary_transform_if_with_stencil_functor
-{
-  UnaryFunction unary_op;
-  Predicate pred;
-  
-  host_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred)
-    : unary_op(_unary_op), pred(_pred) {} 
-  
-  template <typename Tuple>
-  inline __host__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<2,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<1>(t)))
-      thrust::get<2>(t) = unary_op(thrust::get<0>(t));
-  }
-}; // end host_unary_transform_if_with_stencil_functor
-
-
-template <typename UnaryFunction, typename Predicate>
-struct device_unary_transform_if_with_stencil_functor
-{
-  UnaryFunction unary_op;
-  Predicate pred;
-  
-  device_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred)
-    : unary_op(_unary_op), pred(_pred) {} 
-  
-  // add __host__ to allow the omp backend compile with nvcc
-  template <typename Tuple>
-  inline __host__ __device__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<2,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<1>(t)))
-      thrust::get<2>(t) = unary_op(thrust::get<0>(t));
-  }
-}; // end device_unary_transform_if_with_stencil_functor
-
-
-template<typename System, typename UnaryFunction, typename Predicate>
-  struct unary_transform_if_with_stencil_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_unary_transform_if_with_stencil_functor<UnaryFunction,Predicate> >,
-        thrust::detail::identity_<device_unary_transform_if_with_stencil_functor<UnaryFunction,Predicate> >
-      >
-{};
-
-
-template <typename BinaryFunction, typename Predicate>
-struct host_binary_transform_if_functor
-{
-  BinaryFunction binary_op;
-  Predicate pred;
-
-  host_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred)
-    : binary_op(_binary_op), pred(_pred) {} 
-
-  template <typename Tuple>
-  inline __host__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<3,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<2>(t)))
-      thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t));
-  }
-}; // end host_binary_transform_if_functor
-
-
-template <typename BinaryFunction, typename Predicate>
-struct device_binary_transform_if_functor
-{
-  BinaryFunction binary_op;
-  Predicate pred;
-
-  device_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred)
-    : binary_op(_binary_op), pred(_pred) {} 
-
-  // add __host__ to allow the omp backend compile with nvcc
-  template <typename Tuple>
-  inline __host__ __device__
-  typename enable_if_non_const_reference_or_tuple_of_iterator_references<
-    typename thrust::tuple_element<3,Tuple>::type
-  >::type
-    operator()(Tuple t)
-  {
-    if(pred(thrust::get<2>(t)))
-      thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t));
-  }
-}; // end device_binary_transform_if_functor
-
-
-template<typename System, typename BinaryFunction, typename Predicate>
-  struct binary_transform_if_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_binary_transform_if_functor<BinaryFunction,Predicate> >,
-        thrust::detail::identity_<device_binary_transform_if_functor<BinaryFunction,Predicate> >
-      >
-{};
-
-
-template<typename T>
-  struct host_destroy_functor
-{
-  __host__
-  void operator()(T &x) const
-  {
-    x.~T();
-  } // end operator()()
-}; // end host_destroy_functor
-
-
-template<typename T>
-  struct device_destroy_functor
-{
-  // add __host__ to allow the omp backend to compile with nvcc
-  __host__ __device__
-  void operator()(T &x) const
-  {
-    x.~T();
-  } // end operator()()
-}; // end device_destroy_functor
-
-
-template<typename System, typename T>
-  struct destroy_functor
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-        thrust::detail::identity_<host_destroy_functor<T> >,
-        thrust::detail::identity_<device_destroy_functor<T> >
-      >
-{};
-
-
-template <typename T>
-struct fill_functor
-{
-  const T exemplar;
-
-  fill_functor(const T& _exemplar) 
-    : exemplar(_exemplar) {}
-
-  __host__ __device__
-  T operator()(void) const
-  { 
-    return exemplar;
-  }
-};
-
-
-template<typename T>
-  struct uninitialized_fill_functor
-{
-  T exemplar;
-
-  uninitialized_fill_functor(T x):exemplar(x){}
-
-  __host__ __device__
-  void operator()(T &x)
-  {
-    ::new(static_cast<void*>(&x)) T(exemplar);
-  } // end operator()()
-}; // end uninitialized_fill_functor
-
-
-// this predicate tests two two-element tuples
-// we first use a Compare for the first element
-// if the first elements are equivalent, we use
-// < for the second elements
-template<typename Compare>
-  struct compare_first_less_second
-{
-  compare_first_less_second(Compare c)
-    : comp(c) {}
-
-  template<typename T1, typename T2>
-  __host__ __device__
-  bool operator()(T1 lhs, T2 rhs)
-  {
-    return comp(thrust::get<0>(lhs), thrust::get<0>(rhs)) || (!comp(thrust::get<0>(rhs), thrust::get<0>(lhs)) && thrust::get<1>(lhs) < thrust::get<1>(rhs));
-  }
-
-  Compare comp;
-}; // end compare_first_less_second
-
-
-template<typename Compare>
-  struct compare_first
-{
-  Compare comp;
-
-  compare_first(Compare comp)
-    : comp(comp)
-  {}
-
-  template<typename Tuple1, typename Tuple2>
-  __host__ __device__
-  bool operator()(const Tuple1 &x, const Tuple2 &y)
-  {
-    return comp(thrust::raw_reference_cast(thrust::get<0>(x)), thrust::raw_reference_cast(thrust::get<0>(y)));
-  }
-}; // end compare_first
-
-
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/logical.inl b/compat/thrust/detail/logical.inl
deleted file mode 100644
index 126a3e3fb1..0000000000
--- a/compat/thrust/detail/logical.inl
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file logical.inl
- *  \brief Inline file for logical.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/logical.h>
-#include <thrust/system/detail/adl/logical.h>
-
-namespace thrust
-{
-
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool all_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::all_of;
-  return all_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end all_of()
-
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool any_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::any_of;
-  return any_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end any_of()
-
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool none_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::none_of;
-  return none_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end none_of()
-
-
-template <typename InputIterator, typename Predicate>
-bool all_of(InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::all_of(select_system(system), first, last, pred);
-}
-
-
-template <typename InputIterator, typename Predicate>
-bool any_of(InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::any_of(select_system(system), first, last, pred);
-}
-
-
-template <typename InputIterator, typename Predicate>
-bool none_of(InputIterator first, InputIterator last, Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::none_of(select_system(system), first, last, pred);
-}
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/malloc_and_free.h b/compat/thrust/detail/malloc_and_free.h
deleted file mode 100644
index 57b1685476..0000000000
--- a/compat/thrust/detail/malloc_and_free.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/detail/pointer.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/system/detail/generic/memory.h>
-#include <thrust/system/detail/adl/malloc_and_free.h>
-
-namespace thrust
-{
-
-template<typename DerivedPolicy>
-pointer<void,DerivedPolicy> malloc(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, std::size_t n)
-{
-  using thrust::system::detail::generic::malloc;
-
-  // XXX should use a hypothetical thrust::static_pointer_cast here
-  void *raw_ptr = static_cast<void*>(thrust::raw_pointer_cast(malloc(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n)));
-
-  return pointer<void,DerivedPolicy>(raw_ptr);
-}
-
-template<typename T, typename DerivedPolicy>
-pointer<T,DerivedPolicy> malloc(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, std::size_t n)
-{
-  using thrust::system::detail::generic::malloc;
-
-  T *raw_ptr = static_cast<T*>(thrust::raw_pointer_cast(malloc<T>(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n)));
-
-  return pointer<T,DerivedPolicy>(raw_ptr);
-}
-
-
-// XXX WAR nvbug 992955
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-#if CUDA_VERSION < 5000
-
-// cudafe generates unqualified calls to free(int *volatile)
-// which get confused with thrust::free
-// spoof a thrust::free which simply maps to ::free
-inline __host__ __device__
-void free(int *volatile ptr)
-{
-  ::free(ptr);
-}
-
-#endif // CUDA_VERSION
-#endif // THRUST_DEVICE_COMPILER
-
-template<typename DerivedPolicy, typename Pointer>
-void free(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, Pointer ptr)
-{
-  using thrust::system::detail::generic::free;
-
-  free(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), ptr);
-}
-
-// XXX consider another form of free which does not take a system argument and
-// instead infers the system from the pointer
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/merge.inl b/compat/thrust/detail/merge.inl
deleted file mode 100644
index 77f09f5bed..0000000000
--- a/compat/thrust/detail/merge.inl
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file merge.inl
- *  \brief Inline file for merge.h.
- */
-
-#include <thrust/merge.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/merge.h>
-#include <thrust/system/detail/adl/merge.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result)
-{
-  using thrust::system::detail::generic::merge;
-  return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result);
-} // end merge()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator merge(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakCompare comp)
-{
-  using thrust::system::detail::generic::merge;
-  return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp);
-} // end merge()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::merge_by_key;
-  return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end merge_by_key()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2, typename Compare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 Compare comp)
-{
-  using thrust::system::detail::generic::merge_by_key;
-  return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end merge_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator merge(InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp);
-} // end merge()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result);
-} // end merge()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(InputIterator1 keys_first1,
-                 InputIterator1 keys_last1,
-                 InputIterator2 keys_first2,
-                 InputIterator2 keys_last2,
-                 InputIterator3 values_first1,
-                 InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end merge_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(InputIterator1 keys_first1,
-                 InputIterator1 keys_last1,
-                 InputIterator2 keys_first2,
-                 InputIterator2 keys_last2,
-                 InputIterator3 values_first1,
-                 InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end merge_by_key()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/minmax.h b/compat/thrust/detail/minmax.h
deleted file mode 100644
index a560ea1fe5..0000000000
--- a/compat/thrust/detail/minmax.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-
-template<typename T, typename BinaryPredicate>
-__host__ __device__
-  T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp)
-{
-  return comp(rhs, lhs) ? rhs : lhs;
-} // end min()
-
-template<typename T>
-__host__ __device__
-  T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs)
-{
-  return rhs < lhs ? rhs : lhs;
-} // end min()
-
-template<typename T, typename BinaryPredicate>
-__host__ __device__
-  T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp)
-{
-  return comp(lhs,rhs) ? rhs : lhs;
-} // end max()
-
-template<typename T>
-__host__ __device__
-  T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs)
-{
-  return lhs < rhs ? rhs : lhs;
-} // end max()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/mismatch.inl b/compat/thrust/detail/mismatch.inl
deleted file mode 100644
index 37ac663bbe..0000000000
--- a/compat/thrust/detail/mismatch.inl
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file mismatch.inl
- *  \brief Inline file for mismatch.h
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/mismatch.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/mismatch.h>
-#include <thrust/system/detail/adl/mismatch.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-thrust::pair<InputIterator1, InputIterator2> mismatch(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                                      InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2)
-{
-  using thrust::system::detail::generic::mismatch;
-  return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2);
-} // end mismatch()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-thrust::pair<InputIterator1, InputIterator2> mismatch(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                                      InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2,
-                                                      BinaryPredicate pred)
-{
-  using thrust::system::detail::generic::mismatch;
-  return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, pred);
-} // end mismatch()
-
-
-template <typename InputIterator1, typename InputIterator2>
-thrust::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::mismatch(select_system(system1,system2), first1, last1, first2);
-} // end mismatch()
-
-
-template <typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-thrust::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2,
-                                                      BinaryPredicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::mismatch(select_system(system1,system2), first1, last1, first2, pred);
-} // end mismatch()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/mpl/math.h b/compat/thrust/detail/mpl/math.h
deleted file mode 100644
index 80adfc1e88..0000000000
--- a/compat/thrust/detail/mpl/math.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file math.h
- *  \brief Math-related metaprogramming functionality.
- */
-
-
-#pragma once
-
-namespace thrust
-{
-
-namespace detail
-{
-
-namespace mpl
-{
-
-namespace math
-{
-
-namespace detail
-{
-
-// compute the log base-2 of an integer at compile time
-template <unsigned int N, unsigned int Cur>
-struct log2
-{
-    static const unsigned int value = log2<N / 2,Cur+1>::value;
-};
-
-template <unsigned int Cur>
-struct log2<1, Cur>
-{
-    static const unsigned int value = Cur;
-};
-
-template <unsigned int Cur>
-struct log2<0, Cur>
-{
-    // undefined
-};
-
-} // end namespace detail
-
-
-template <unsigned int N>
-struct log2
-{
-    static const unsigned int value = detail::log2<N,0>::value;
-};
-
-
-template <typename T, T lhs, T rhs>
-struct min
-{
-  static const T value = (lhs < rhs) ? lhs : rhs;
-};
-
-
-template <typename T, T lhs, T rhs>
-struct max
-{
-  static const T value = (!(lhs < rhs)) ? lhs : rhs;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct mul
-{
-  static const result_type value = x * y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct mod
-{
-  static const result_type value = x % y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct div
-{
-  static const result_type value = x / y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct geq
-{
-  static const bool value = x >= y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct lt
-{
-  static const bool value = x < y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct gt
-{
-  static const bool value = x > y;
-};
-
-
-template<bool x, bool y>
-  struct or_
-{
-  static const bool value = (x || y);
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct bit_and
-{
-  static const result_type value = x & y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct plus
-{
-  static const result_type value = x + y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct minus
-{
-  static const result_type value = x - y;
-};
-
-
-template<typename result_type, result_type x, result_type y>
-  struct equal
-{
-  static const bool value = x == y;
-};
-
-
-template<typename result_type, result_type x>
-  struct is_odd
-{
-  static const bool value = x & 1;
-};
-
-
-} // end namespace math
-
-} // end namespace mpl
-
-} // end namespace detail
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/numeric_traits.h b/compat/thrust/detail/numeric_traits.h
deleted file mode 100644
index a3bc56c211..0000000000
--- a/compat/thrust/detail/numeric_traits.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-#include <limits>
-
-//#include <stdint.h> // for intmax_t (not provided on MSVS 2005)
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// XXX good enough for the platforms we care about
-typedef long long intmax_t;
-
-template<typename Number>
-  struct is_signed
-    : integral_constant<bool, std::numeric_limits<Number>::is_signed>
-{}; // end is_signed
-
-
-template<typename T>
-  struct num_digits
-    : eval_if<
-        std::numeric_limits<T>::is_specialized,
-        integral_constant<
-          int,
-          std::numeric_limits<T>::digits
-        >,
-        integral_constant<
-          int,
-          sizeof(T) * std::numeric_limits<unsigned char>::digits - (is_signed<T>::value ? 1 : 0)  
-        >
-      >::type
-{}; // end num_digits
-
-
-template<typename Integer>
-  struct integer_difference
-    //: eval_if<
-    //    sizeof(Integer) >= sizeof(intmax_t),
-    //    eval_if<
-    //      is_signed<Integer>::value,
-    //      identity_<Integer>,
-    //      identity_<intmax_t>
-    //    >,
-    //    eval_if<
-    //      sizeof(Integer) < sizeof(std::ptrdiff_t),
-    //      identity_<std::ptrdiff_t>,
-    //      identity_<intmax_t>
-    //    >
-    //  >
-{
-  private:
-    // XXX workaround a pedantic warning in old versions of g++
-    //     which complains about &&ing with a constant value
-    template<bool x, bool y>
-      struct and_
-    {
-      static const bool value = false;
-    };
-
-    template<bool y>
-      struct and_<true,y>
-    {
-      static const bool value = y;
-    };
-
-  public:
-    typedef typename
-      eval_if<
-        and_<
-          std::numeric_limits<Integer>::is_signed,
-          // digits is the number of no-sign bits
-          (!std::numeric_limits<Integer>::is_bounded || (int(std::numeric_limits<Integer>::digits) + 1 >= num_digits<intmax_t>::value))
-        >::value,
-        identity_<Integer>,
-        eval_if<
-          int(std::numeric_limits<Integer>::digits) + 1 < num_digits<signed int>::value,
-          identity_<signed int>,
-          eval_if<
-            int(std::numeric_limits<Integer>::digits) + 1 < num_digits<signed long>::value,
-            identity_<signed long>,
-            identity_<intmax_t>
-          >
-        >
-      >::type type;
-}; // end integer_difference
-
-
-template<typename Number>
-  struct numeric_difference
-    : eval_if<
-      is_integral<Number>::value,
-      integer_difference<Number>,
-      identity_<Number>
-    >
-{}; // end numeric_difference
-
-
-template<typename Number>
-__host__ __device__
-typename numeric_difference<Number>::type
-numeric_distance(Number x, Number y)
-{
-  typedef typename numeric_difference<Number>::type difference_type;
-  return difference_type(y) - difference_type(x);
-} // end numeric_distance
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/overlapped_copy.h b/compat/thrust/detail/overlapped_copy.h
deleted file mode 100644
index a5540b8643..0000000000
--- a/compat/thrust/detail/overlapped_copy.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/detail/copy.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator sequential_copy(InputIterator first,
-                                 InputIterator last,
-                                 OutputIterator result)
-{
-  for(; first != last; ++first, ++result)
-  {
-    *result = *first;
-  } // end for
-
-  return result;
-} // end sequential_copy()
-
-
-template<typename BidirectionalIterator1,
-         typename BidirectionalIterator2>
-  BidirectionalIterator2 sequential_copy_backward(BidirectionalIterator1 first,
-                                                  BidirectionalIterator1 last,
-                                                  BidirectionalIterator2 result)
-{
-  // yes, we preincrement
-  // the ranges are open on the right, i.e. [first, last)
-  while(first != last)
-  {
-    *--result = *--last;
-  } // end while
-
-  return result;
-} // end sequential_copy_backward()
-
-
-namespace dispatch
-{
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 overlapped_copy(thrust::system::cpp::detail::execution_policy<DerivedPolicy> &,
-                                        RandomAccessIterator1 first,
-                                        RandomAccessIterator1 last,
-                                        RandomAccessIterator2 result)
-{
-  if(first < last && first <= result && result < last)
-  {
-    // result lies in [first, last)
-    // it's safe to use std::copy_backward here
-    thrust::detail::sequential_copy_backward(first, last, result + (last - first));
-    result += (last - first);
-  } // end if
-  else
-  {
-    // result + (last - first) lies in [first, last)
-    // it's safe to use sequential_copy here
-    result = thrust::detail::sequential_copy(first, last, result);
-  } // end else
-
-  return result;
-} // end overlapped_copy()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 overlapped_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                                        RandomAccessIterator1 first,
-                                        RandomAccessIterator1 last,
-                                        RandomAccessIterator2 result)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type;
-
-  // make a temporary copy of [first,last), and copy into it first
-  thrust::detail::temporary_array<value_type, DerivedPolicy> temp(exec, first, last);
-  return thrust::copy(exec, temp.begin(), temp.end(), result);
-} // end overlapped_copy()
-
-} // end dispatch
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 overlapped_copy(RandomAccessIterator1 first,
-                                        RandomAccessIterator1 last,
-                                        RandomAccessIterator2 result)
-{
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System1;
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System2;
-
-  typedef typename thrust::detail::minimum_system<System1, System2>::type System;
-
-  // XXX presumes System is default constructible
-  System system;
-
-  return thrust::detail::dispatch::overlapped_copy(system, first, last, result);
-} // end overlapped_copy()
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/pair.inl b/compat/thrust/detail/pair.inl
deleted file mode 100644
index 776bdc2315..0000000000
--- a/compat/thrust/detail/pair.inl
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/pair.h>
-#include <thrust/detail/swap.h>
-
-namespace thrust
-{
-
-template <typename T1, typename T2>
-  pair<T1,T2>
-    ::pair(void)
-      :first(),second()
-{
-  ;
-} // end pair::pair()
-
-
-template <typename T1, typename T2>
-  pair<T1,T2>
-    ::pair(const T1 &x, const T2 &y)
-      :first(x),second(y)
-{
-  ;
-} // end pair::pair()
-
-
-template <typename T1, typename T2>
-  template <typename U1, typename U2>
-    pair<T1,T2>
-      ::pair(const pair<U1,U2> &p)
-        :first(p.first),second(p.second)
-{
-  ;
-} // end pair::pair()
-
-
-template <typename T1, typename T2>
-  template <typename U1, typename U2>
-    pair<T1,T2>
-      ::pair(const std::pair<U1,U2> &p)
-        :first(p.first),second(p.second)
-{
-  ;
-} // end pair::pair()
-
-
-template<typename T1, typename T2>
-  inline __host__ __device__
-    void pair<T1,T2>
-      ::swap(thrust::pair<T1,T2> &p)
-{
-  using thrust::swap;
-
-  swap(first, p.first);
-  swap(second, p.second);
-} // end pair::swap()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator==(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return x.first == y.first && x.second == y.second;
-} // end operator==()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator<(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
-} // end operator<()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator!=(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return !(x == y);
-} // end operator==()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator>(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return y < x;
-} // end operator<()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator<=(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return !(y < x);
-} // end operator<=()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator>=(const pair<T1,T2> &x, const pair<T1,T2> &y)
-{
-  return !(x < y);
-} // end operator>=()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    void swap(pair<T1,T2> &x, pair<T1,T2> &y)
-{
-  return x.swap(y);
-} // end swap()
-
-
-template <typename T1, typename T2>
-  inline __host__ __device__
-    pair<T1,T2> make_pair(T1 x, T2 y)
-{
-  return pair<T1,T2>(x,y);
-} // end make_pair()
-
-
-// specializations of tuple_element for pair
-template<typename T1, typename T2>
-  struct tuple_element<0, pair<T1,T2> >
-{
-  typedef T1 type;
-}; // end tuple_element
-
-template<typename T1, typename T2>
-  struct tuple_element<1, pair<T1,T2> >
-{
-  typedef T2 type;
-}; // end tuple_element
-
-
-// specialization of tuple_size for pair
-template<typename T1, typename T2>
-  struct tuple_size< pair<T1,T2 > >
-{
-  static const unsigned int value = 2;
-}; // end tuple_size
-
-
-
-namespace detail
-{
-
-
-template<int N, typename Pair> struct pair_get {};
-
-template<typename Pair>
-  struct pair_get<0, Pair>
-{
-  inline __host__ __device__
-    const typename tuple_element<0, Pair>::type &
-      operator()(const Pair &p) const
-  {
-    return p.first;
-  } // end operator()()
-
-  inline __host__ __device__
-    typename tuple_element<0, Pair>::type &
-      operator()(Pair &p) const
-  {
-    return p.first;
-  } // end operator()()
-}; // end pair_get
-
-
-template<typename Pair>
-  struct pair_get<1, Pair>
-{
-  inline __host__ __device__
-    const typename tuple_element<1, Pair>::type &
-      operator()(const Pair &p) const
-  {
-    return p.second;
-  } // end operator()()
-
-  inline __host__ __device__
-    typename tuple_element<1, Pair>::type &
-      operator()(Pair &p) const
-  {
-    return p.second;
-  } // end operator()()
-}; // end pair_get
-
-} // end detail
-
-
-
-template<unsigned int N, typename T1, typename T2>
-  inline __host__ __device__
-    typename tuple_element<N, pair<T1,T2> >::type &
-      get(pair<T1,T2> &p)
-{
-  return detail::pair_get<N, pair<T1,T2> >()(p);
-} // end get()
-
-template<unsigned int N, typename T1, typename T2>
-  inline __host__ __device__
-    const typename tuple_element<N, pair<T1,T2> >::type &
-      get(const pair<T1,T2> &p)
-{
-  return detail::pair_get<N, pair<T1,T2> >()(p);
-} // end get()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/partition.inl b/compat/thrust/detail/partition.inl
deleted file mode 100644
index 19ef08a73c..0000000000
--- a/compat/thrust/detail/partition.inl
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file partition.inl
- *  \brief Inline file for partition.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/partition.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/partition.h>
-#include <thrust/system/detail/adl/partition.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::partition;
-  return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::partition;
-  return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred);
-} // end partition()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  using thrust::system::detail::generic::partition_copy;
-  return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred);
-} // end partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  using thrust::system::detail::generic::partition_copy;
-  return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred);
-} // end partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  using thrust::system::detail::generic::stable_partition;
-  return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  using thrust::system::detail::generic::stable_partition;
-  return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred);
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  using thrust::system::detail::generic::stable_partition_copy;
-  return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  using thrust::system::detail::generic::stable_partition_copy;
-  return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate>
-  ForwardIterator partition_point(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred)
-{
-  using thrust::system::detail::generic::partition_point;
-  return partition_point(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end partition_point()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-  bool is_partitioned(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  using thrust::system::detail::generic::is_partitioned;
-  return is_partitioned(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end is_partitioned()
-
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::partition(select_system(system), first, last, pred);
-} // end partition()
-
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-  typedef typename thrust::iterator_system<InputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::partition(select_system(system1,system2), first, last, stencil, pred);
-} // end partition()
-
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::stable_partition(select_system(system), first, last, pred);
-} // end stable_partition()
-
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-  typedef typename thrust::iterator_system<InputIterator>::type   System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::stable_partition(select_system(system1,system2), first, last, stencil, pred);
-} // end stable_partition()
-
-
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type   System1;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred);
-} // end partition_copy()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator1>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred);
-} // end partition_copy()
-
-
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type   System1;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::stable_partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type   System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type   System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type  System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type  System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::stable_partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename ForwardIterator, typename Predicate>
-  ForwardIterator partition_point(ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::partition_point(select_system(system), first, last, pred);
-} // end partition_point()
-
-
-template<typename InputIterator, typename Predicate>
-  bool is_partitioned(InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::is_partitioned(select_system(system), first, last, pred);
-} // end is_partitioned()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/pointer.h b/compat/thrust/detail/pointer.h
deleted file mode 100644
index bc97939c77..0000000000
--- a/compat/thrust/detail/pointer.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/detail/iterator_traversal_tags.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/reference_forward_declaration.h>
-
-namespace thrust
-{
-
-// declare pointer with default values of template parameters
-template<typename Element, typename Tag, typename Reference = use_default, typename Derived = use_default> class pointer;
-
-} // end thrust
-
-
-// specialize std::iterator_traits to avoid problems with the name of
-// pointer's constructor shadowing its nested pointer type
-// do this before pointer is defined so the specialization is correctly
-// used inside the definition
-namespace std
-{
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  struct iterator_traits<thrust::pointer<Element,Tag,Reference,Derived> >
-{
-  private:
-    typedef thrust::pointer<Element,Tag,Reference,Derived> ptr;
-
-  public:
-    typedef typename ptr::iterator_category iterator_category;
-    typedef typename ptr::value_type        value_type;
-    typedef typename ptr::difference_type   difference_type;
-    // XXX implement this type (the result of operator->) later
-    typedef void                             pointer;
-    typedef typename ptr::reference         reference;
-}; // end iterator_traits
-
-} // end std
-
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// this metafunction computes the type of iterator_adaptor thrust::pointer should inherit from
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  struct pointer_base
-{
-  // void pointers should have no element type
-  // note that we remove_cv from the Element type to get the value_type
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::is_void<typename thrust::detail::remove_const<Element>::type>::value,
-    thrust::detail::identity_<void>,
-    thrust::detail::remove_cv<Element>
-  >::type value_type;
-
-  // if no Derived type is given, just use pointer
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::is_same<Derived,use_default>::value,
-    thrust::detail::identity_<pointer<Element,Tag,Reference,Derived> >,
-    thrust::detail::identity_<Derived>
-  >::type derived_type;
-
-  // void pointers should have no reference type
-  // if no Reference type is given, just use reference
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::is_void<typename thrust::detail::remove_const<Element>::type>::value,
-    thrust::detail::identity_<void>,
-    thrust::detail::eval_if<
-      thrust::detail::is_same<Reference,use_default>::value,
-      thrust::detail::identity_<reference<Element,derived_type> >,
-      thrust::detail::identity_<Reference>
-    >
-  >::type reference_arg;
-
-  typedef thrust::iterator_adaptor<
-    derived_type,                        // pass along the type of our Derived class to iterator_adaptor
-    Element *,                           // we adapt a raw pointer
-    value_type,                          // the value type
-    Tag,                                 // system tag
-    thrust::random_access_traversal_tag, // pointers have random access traversal
-    reference_arg,                       // pass along our Reference type
-    std::ptrdiff_t
-  > type;
-}; // end pointer_base
-
-
-} // end detail
-
-
-// the base type for all of thrust's tagged pointers.
-// for reasonable pointer-like semantics, derived types should reimplement the following:
-// 1. no-argument constructor
-// 2. constructor from OtherElement *
-// 3. constructor from OtherPointer related by convertibility
-// 4. assignment from OtherPointer related by convertibility
-// These should just call the corresponding members of pointer.
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  class pointer
-    : public thrust::detail::pointer_base<Element,Tag,Reference,Derived>::type
-{
-  private:
-    typedef typename thrust::detail::pointer_base<Element,Tag,Reference,Derived>::type         super_t;
-
-    typedef typename thrust::detail::pointer_base<Element,Tag,Reference,Derived>::derived_type derived_type;
-
-    // friend iterator_core_access to give it access to dereference
-    friend class thrust::iterator_core_access;
-
-    __host__ __device__
-    typename super_t::reference dereference() const;
-
-    // don't provide access to this part of super_t's interface
-    using super_t::base;
-    using typename super_t::base_type;
-
-  public:
-    typedef typename super_t::base_type raw_pointer;
-
-    // constructors
-    
-    __host__ __device__
-    pointer();
-
-    // OtherValue shall be convertible to Value
-    // XXX consider making the pointer implementation a template parameter which defaults to Element *
-    template<typename OtherElement>
-    __host__ __device__
-    explicit pointer(OtherElement *ptr);
-
-    // OtherPointer's element_type shall be convertible to Element
-    // OtherPointer's system shall be convertible to Tag
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer<Element,Tag,Reference,Derived>
-            >::type * = 0);
-
-    // assignment
-    
-    // OtherPointer's element_type shall be convertible to Element
-    // OtherPointer's system shall be convertible to Tag
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      derived_type &
-    >::type
-    operator=(const OtherPointer &other);
-
-    // observers
-
-    __host__ __device__
-    Element *get() const;
-}; // end pointer
-
-} // end thrust
-
-#include <thrust/detail/pointer.inl>
-
diff --git a/compat/thrust/detail/pointer.inl b/compat/thrust/detail/pointer.inl
deleted file mode 100644
index 1d066b041c..0000000000
--- a/compat/thrust/detail/pointer.inl
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/pointer.h>
-
-
-namespace thrust
-{
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  pointer<Element,Tag,Reference,Derived>
-    ::pointer()
-      : super_t(static_cast<Element*>(0))
-{} // end pointer::pointer
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  template<typename OtherElement>
-    pointer<Element,Tag,Reference,Derived>
-      ::pointer(OtherElement *other)
-        : super_t(other)
-{} // end pointer::pointer
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  template<typename OtherPointer>
-    pointer<Element,Tag,Reference,Derived>
-      ::pointer(const OtherPointer &other,
-                typename thrust::detail::enable_if_pointer_is_convertible<
-                  OtherPointer,
-                  pointer<Element,Tag,Reference,Derived>
-                 >::type *)
-        : super_t(thrust::detail::pointer_traits<OtherPointer>::get(other))
-{} // end pointer::pointer
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  template<typename OtherPointer>
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer<Element,Tag,Reference,Derived>,
-      typename pointer<Element,Tag,Reference,Derived>::derived_type &
-    >::type
-      pointer<Element,Tag,Reference,Derived>
-        ::operator=(const OtherPointer &other)
-{
-  super_t::base_reference() = thrust::detail::pointer_traits<OtherPointer>::get(other);
-  return static_cast<derived_type&>(*this);
-} // end pointer::operator=
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  typename pointer<Element,Tag,Reference,Derived>::super_t::reference
-    pointer<Element,Tag,Reference,Derived>
-      ::dereference() const
-{
-  return typename super_t::reference(static_cast<const derived_type&>(*this));
-} // end pointer::dereference
-
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  Element *pointer<Element,Tag,Reference,Derived>
-    ::get() const
-{
-  return super_t::base();
-} // end pointer::get
-
-
-namespace detail
-{
-
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-// XXX WAR MSVC 2005 problem with correctly implementing
-//     pointer_raw_pointer for pointer by specializing it here
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  struct pointer_raw_pointer< thrust::pointer<Element,Tag,Reference,Derived> >
-{
-  typedef typename pointer<Element,Tag,Reference,Derived>::raw_pointer type;
-}; // end pointer_raw_pointer
-#endif
-
-
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40200)
-// XXX WAR g++-4.1 problem with correctly implementing
-//     pointer_element for pointer by specializing it here
-template<typename Element, typename Tag>
-  struct pointer_element< thrust::pointer<Element,Tag> >
-{
-  typedef Element type;
-}; // end pointer_element
-
-template<typename Element, typename Tag, typename Reference>
-  struct pointer_element< thrust::pointer<Element,Tag,Reference> >
-    : pointer_element< thrust::pointer<Element,Tag> >
-{}; // end pointer_element
-
-template<typename Element, typename Tag, typename Reference, typename Derived>
-  struct pointer_element< thrust::pointer<Element,Tag,Reference,Derived> >
-    : pointer_element< thrust::pointer<Element,Tag,Reference> >
-{}; // end pointer_element
-
-
-
-// XXX WAR g++-4.1 problem with correctly implementing
-//     rebind_pointer for pointer by specializing it here
-template<typename Element, typename Tag, typename NewElement>
-  struct rebind_pointer<thrust::pointer<Element,Tag>, NewElement>
-{
-  // XXX note we don't attempt to rebind the pointer's Reference type (or Derived)
-  typedef thrust::pointer<NewElement,Tag> type;
-};
-
-template<typename Element, typename Tag, typename Reference, typename NewElement>
-  struct rebind_pointer<thrust::pointer<Element,Tag,Reference>, NewElement>
-    : rebind_pointer<thrust::pointer<Element,Tag>, NewElement>
-{};
-
-template<typename Element, typename Tag, typename Reference, typename Derived, typename NewElement>
-  struct rebind_pointer<thrust::pointer<Element,Tag,Reference,Derived>, NewElement>
-    : rebind_pointer<thrust::pointer<Element,Tag,Reference>, NewElement>
-{};
-#endif
-
-} // end namespace detail
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/range/tail_flags.h b/compat/thrust/detail/range/tail_flags.h
deleted file mode 100644
index 06fd9f8a00..0000000000
--- a/compat/thrust/detail/range/tail_flags.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/tuple.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename RandomAccessIterator,
-         typename BinaryPredicate = thrust::equal_to<typename thrust::iterator_value<RandomAccessIterator>::type>,
-         typename IndexType = typename thrust::iterator_difference<RandomAccessIterator>::type>
-  class tail_flags
-{
-  private:
-    struct tail_flag_functor
-    {
-      BinaryPredicate binary_pred; // this must be the first member for performance reasons
-      IndexType n;
-
-      typedef bool result_type;
-
-      tail_flag_functor(IndexType n)
-        : binary_pred(), n(n)
-      {}
-
-      tail_flag_functor(IndexType n, BinaryPredicate binary_pred)
-        : binary_pred(binary_pred), n(n)
-      {}
-
-      template<typename Tuple>
-      __host__ __device__ __thrust_forceinline__
-      result_type operator()(const Tuple &t)
-      {
-        const IndexType i = thrust::get<0>(t);
-
-        // note that we do not dereference the tuple's 2nd element when i >= n
-        // and therefore do not dereference a bad location at the boundary
-        return (i == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t)));
-      }
-    };
-
-    typedef thrust::counting_iterator<IndexType> counting_iterator;
-
-  public:
-    typedef thrust::transform_iterator<
-      tail_flag_functor,
-      thrust::zip_iterator<thrust::tuple<counting_iterator,RandomAccessIterator,RandomAccessIterator> >
-    > iterator;
-
-    tail_flags(RandomAccessIterator first, RandomAccessIterator last)
-      : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first + 1)),
-                                                tail_flag_functor(last - first))),
-        m_end(m_begin + (last - first))
-    {}
-
-    tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred)
-      : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first + 1)),
-                                                tail_flag_functor(last - first, binary_pred))),
-        m_end(m_begin + (last - first))
-    {}
-
-    iterator begin() const
-    {
-      return m_begin;
-    }
-
-    iterator end() const
-    {
-      return m_end;
-    }
-
-    template<typename OtherIndex>
-    typename iterator::reference operator[](OtherIndex i)
-    {
-      return *(begin() + i);
-    }
-
-  private:
-    iterator m_begin, m_end;
-};
-
-
-template<typename RandomAccessIterator, typename BinaryPredicate>
-  tail_flags<RandomAccessIterator, BinaryPredicate>
-    make_tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred)
-{
-  return tail_flags<RandomAccessIterator, BinaryPredicate>(first, last, binary_pred);
-}
-
-
-template<typename RandomAccessIterator>
-  tail_flags<RandomAccessIterator>
-    make_tail_flags(RandomAccessIterator first, RandomAccessIterator last)
-{
-  return tail_flags<RandomAccessIterator>(first, last);
-}
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/raw_pointer_cast.h b/compat/thrust/detail/raw_pointer_cast.h
deleted file mode 100644
index 05e1e6bc45..0000000000
--- a/compat/thrust/detail/raw_pointer_cast.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-
-namespace thrust
-{
-
-template<typename Pointer>
-  inline __host__ __device__ typename thrust::detail::pointer_traits<Pointer>::raw_pointer
-    raw_pointer_cast(const Pointer &ptr)
-{
-  return thrust::detail::pointer_traits<Pointer>::get(ptr);
-} // end raw_pointer_cast()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/raw_reference_cast.h b/compat/thrust/detail/raw_reference_cast.h
deleted file mode 100644
index 1ffd7e5701..0000000000
--- a/compat/thrust/detail/raw_reference_cast.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/has_nested_type.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/tuple_of_iterator_references.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(is_wrapped_reference, wrapped_reference_hint)
-
-namespace raw_reference_detail
-{
-
-template<typename T, typename Enable = void>
-  struct raw_reference
-    : add_reference<T>
-{};
-
-
-// XXX consider making raw_reference<T&> an error
-
-
-template<typename T>
-  struct raw_reference<
-    T,
-    typename thrust::detail::enable_if<
-      is_wrapped_reference<
-        typename remove_cv<T>::type
-      >::value
-    >::type
-  >
-{
-  typedef typename add_reference<
-    typename pointer_element<typename T::pointer>::type
-  >::type type;
-};
-
-} // end raw_reference_ns
-
-template<typename T>
-  struct raw_reference : 
-    raw_reference_detail::raw_reference<T>
-{};
-
-
-// wrapped reference-like things which aren't strictly wrapped references
-// (e.g. tuples of wrapped references) are considered unwrappable
-template<typename T>
-  struct is_unwrappable
-    : is_wrapped_reference<T>
-{};
-
-
-template<typename T, typename Result = void>
-  struct enable_if_unwrappable
-    : enable_if<
-        is_unwrappable<T>::value,
-        Result
-      >
-{};
-
-
-} // end detail
-
-
-template<typename T>
-  inline __host__ __device__ typename detail::raw_reference<T>::type raw_reference_cast(T &ref)
-{
-  return *thrust::raw_pointer_cast(&ref);
-} // end raw_reference_cast
-
-
-template<typename T>
-  inline __host__ __device__ typename detail::raw_reference<const T>::type raw_reference_cast(const T &ref)
-{
-  return *thrust::raw_pointer_cast(&ref);
-} // end raw_reference_cast
-
-
-template<
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-inline __host__ __device__
-typename detail::enable_if_unwrappable<
-  thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>,
-  typename detail::raw_reference<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >::type
->::type
-raw_reference_cast(detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> t);
-
-
-} // end thrust
-
-#include <thrust/detail/raw_reference_cast.inl>
-
diff --git a/compat/thrust/detail/raw_reference_cast.inl b/compat/thrust/detail/raw_reference_cast.inl
deleted file mode 100644
index ea619ec028..0000000000
--- a/compat/thrust/detail/raw_reference_cast.inl
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_reference_cast.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/tuple_transform.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-// specialize is_unwrappable
-// a tuple is_unwrappable if any of its elements is_unwrappable
-template<
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct is_unwrappable<
-    thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-    : or_<
-        is_unwrappable<T0>,
-        is_unwrappable<T1>,
-        is_unwrappable<T2>,
-        is_unwrappable<T3>,
-        is_unwrappable<T4>,
-        is_unwrappable<T5>,
-        is_unwrappable<T6>,
-        is_unwrappable<T7>,
-        is_unwrappable<T8>,
-        is_unwrappable<T9>
-      >
-{};
-
-
-// specialize is_unwrappable
-// a tuple_of_iterator_references is_unwrappable if any of its elements is_unwrappable
-template<
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct is_unwrappable<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-    : or_<
-        is_unwrappable<T0>,
-        is_unwrappable<T1>,
-        is_unwrappable<T2>,
-        is_unwrappable<T3>,
-        is_unwrappable<T4>,
-        is_unwrappable<T5>,
-        is_unwrappable<T6>,
-        is_unwrappable<T7>,
-        is_unwrappable<T8>,
-        is_unwrappable<T9>
-      >
-{};
-
-
-namespace raw_reference_detail
-{
-
-// unlike raw_reference,
-// raw_reference_tuple_helper needs to return a value
-// when it encounters one, rather than a reference
-// upon encountering tuple, recurse
-//
-// we want the following behavior:
-//  1. T                                -> T
-//  2. T&                               -> T&
-//  3. null_type                        -> null_type
-//  4. reference<T>                     -> T&
-//  5. tuple_of_iterator_references<T>  -> tuple_of_iterator_references<raw_reference_tuple_helper<T>::type>
-
-
-// wrapped references are unwrapped using raw_reference, otherwise, return T
-template<typename T>
-  struct raw_reference_tuple_helper
-    : eval_if<
-        is_unwrappable<
-          typename remove_cv<T>::type
-        >::value,
-        raw_reference<T>,
-        identity_<T>
-      >
-{};
-
-
-// recurse on tuples
-template <
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct raw_reference_tuple_helper<
-    thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-{
-  typedef thrust::tuple<
-    typename raw_reference_tuple_helper<T0>::type,
-    typename raw_reference_tuple_helper<T1>::type,
-    typename raw_reference_tuple_helper<T2>::type,
-    typename raw_reference_tuple_helper<T3>::type,
-    typename raw_reference_tuple_helper<T4>::type,
-    typename raw_reference_tuple_helper<T5>::type,
-    typename raw_reference_tuple_helper<T6>::type,
-    typename raw_reference_tuple_helper<T7>::type,
-    typename raw_reference_tuple_helper<T8>::type,
-    typename raw_reference_tuple_helper<T9>::type
-  > type;
-};
-
-
-template <
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct raw_reference_tuple_helper<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-{
-  typedef thrust::detail::tuple_of_iterator_references<
-    typename raw_reference_tuple_helper<T0>::type,
-    typename raw_reference_tuple_helper<T1>::type,
-    typename raw_reference_tuple_helper<T2>::type,
-    typename raw_reference_tuple_helper<T3>::type,
-    typename raw_reference_tuple_helper<T4>::type,
-    typename raw_reference_tuple_helper<T5>::type,
-    typename raw_reference_tuple_helper<T6>::type,
-    typename raw_reference_tuple_helper<T7>::type,
-    typename raw_reference_tuple_helper<T8>::type,
-    typename raw_reference_tuple_helper<T9>::type
-  > type;
-};
-
-
-} // end raw_reference_detail
-
-
-// if a tuple "tuple_type" is_unwrappable,
-//   then the raw_reference of tuple_type is a tuple of its members' raw_references
-//   else the raw_reference of tuple_type is tuple_type &
-template <
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct raw_reference<
-    thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-{
-  private:
-    typedef thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> tuple_type;
-
-  public:
-    typedef typename eval_if<
-      is_unwrappable<tuple_type>::value,
-      raw_reference_detail::raw_reference_tuple_helper<tuple_type>,
-      add_reference<tuple_type>
-    >::type type;
-};
-
-
-template <
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  struct raw_reference<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >
-{
-  private:
-    typedef detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> tuple_type;
-
-  public:
-    typedef typename raw_reference_detail::raw_reference_tuple_helper<tuple_type>::type type;
-
-    // XXX figure out why is_unwrappable seems to be broken for tuple_of_iterator_references
-    //typedef typename eval_if<
-    //  is_unwrappable<tuple_type>::value,
-    //  raw_reference_detail::raw_reference_tuple_helper<tuple_type>,
-    //  add_reference<tuple_type>
-    //>::type type;
-};
-
-
-struct raw_reference_caster
-{
-  template<typename T>
-  __host__ __device__
-  typename detail::raw_reference<T>::type operator()(T &ref)
-  {
-    return thrust::raw_reference_cast(ref);
-  }
-
-  template<typename T>
-  __host__ __device__
-  typename detail::raw_reference<const T>::type operator()(const T &ref)
-  {
-    return thrust::raw_reference_cast(ref);
-  }
-
-  template<
-    typename T0, typename T1, typename T2,
-    typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8,
-    typename T9
-  >
-  __host__ __device__
-  typename detail::raw_reference<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >::type
-  operator()(thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> t,
-             typename enable_if<
-               is_unwrappable<thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> >::value
-             >::type * = 0)
-  {
-    return thrust::raw_reference_cast(t);
-  }
-}; // end raw_reference_caster
-
-
-} // end detail
-
-
-template<
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-__host__ __device__
-typename detail::enable_if_unwrappable<
-  thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>,
-  typename detail::raw_reference<
-    thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-  >::type
->::type
-raw_reference_cast(thrust::detail::tuple_of_iterator_references<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> t)
-{
-  thrust::detail::raw_reference_caster f;
-
-  // note that we pass raw_reference_tuple_helper, not raw_reference as the unary metafunction
-  // the subtle difference is important
-  return thrust::detail::tuple_host_device_transform<detail::raw_reference_detail::raw_reference_tuple_helper>(t, f);
-} // end raw_reference_cast
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/reduce.inl b/compat/thrust/detail/reduce.inl
deleted file mode 100644
index ba84423475..0000000000
--- a/compat/thrust/detail/reduce.inl
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.inl
- *  \brief Inline file for reduce.h.
- */
-
-#include <thrust/reduce.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/reduce.h>
-#include <thrust/system/detail/generic/reduce_by_key.h>
-#include <thrust/system/detail/adl/reduce.h>
-#include <thrust/system/detail/adl/reduce_by_key.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator>
-  typename thrust::iterator_traits<InputIterator>::value_type
-    reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last)
-{
-  using thrust::system::detail::generic::reduce;
-  return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end reduce()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename T>
-  T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-           InputIterator first,
-           InputIterator last,
-           T init)
-{
-  using thrust::system::detail::generic::reduce;
-  return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init);
-} // end reduce()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename T,
-         typename BinaryFunction>
-  T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-           InputIterator first,
-           InputIterator last,
-           T init,
-           BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::reduce;
-  return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, binary_op);
-} // end reduce()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output)
-{
-  using thrust::system::detail::generic::reduce_by_key;
-  return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output);
-} // end reduce_by_key()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::reduce_by_key;
-  return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred);
-} // end reduce_by_key()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::reduce_by_key;
-  return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
-} // end reduce_by_key()
-
-
-template<typename InputIterator>
-typename thrust::iterator_traits<InputIterator>::value_type
-  reduce(InputIterator first,
-         InputIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::reduce(select_system(system), first, last);
-}
-
-template<typename InputIterator,
-         typename T>
-   T reduce(InputIterator first,
-            InputIterator last,
-            T init)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::reduce(select_system(system), first, last, init);
-}
-
-
-template<typename InputIterator,
-         typename T,
-         typename BinaryFunction>
-   T reduce(InputIterator first,
-            InputIterator last,
-            T init,
-            BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::reduce(select_system(system), first, last, init, binary_op);
-}
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output);
-}
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred);
-}
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
-}
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/reference.h b/compat/thrust/detail/reference.h
deleted file mode 100644
index 8c0b06186f..0000000000
--- a/compat/thrust/detail/reference.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/use_default.h>
-#include <thrust/detail/reference_forward_declaration.h>
-
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename> struct is_wrapped_reference;
-
-}
-
-// the base type for all of thrust's system-annotated references.
-// for reasonable reference-like semantics, derived types must reimplement the following:
-// 1. constructor from pointer
-// 2. copy constructor
-// 3. templated copy constructor from other reference
-// 4. templated assignment from other reference
-// 5. assignment from value_type
-template<typename Element, typename Pointer, typename Derived>
-  class reference
-{
-  private:
-    typedef typename thrust::detail::eval_if<
-      thrust::detail::is_same<Derived,use_default>::value,
-      thrust::detail::identity_<reference>,
-      thrust::detail::identity_<Derived>
-    >::type derived_type;
-
-    // hint for is_wrapped_reference lets it know that this type (or a derived type)
-    // is a wrapped reference
-    struct wrapped_reference_hint {};
-    template<typename> friend struct thrust::detail::is_wrapped_reference;
-
-  public:
-    typedef Pointer                                              pointer;
-    typedef typename thrust::detail::remove_const<Element>::type value_type;
-
-    __host__ __device__
-    explicit reference(const pointer &ptr);
-
-    template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    __host__ __device__
-    reference(const reference<OtherElement,OtherPointer,OtherDerived> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherElement,OtherPointer,OtherDerived>::pointer,
-                pointer
-              >::type * = 0);
-
-    __host__ __device__
-    derived_type &operator=(const reference &other);
-
-    // XXX this may need an enable_if
-    template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    __host__ __device__
-    derived_type &operator=(const reference<OtherElement,OtherPointer,OtherDerived> &other);
-
-    __host__ __device__
-    derived_type &operator=(const value_type &x);
-
-    __host__ __device__
-    pointer operator&() const;
-
-    __host__ __device__
-    operator value_type () const;
-
-    __host__ __device__
-    void swap(derived_type &other);
-
-    derived_type &operator++();
-
-    value_type operator++(int);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator+=(const value_type &rhs);
-
-    derived_type &operator--();
-
-    value_type operator--(int);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator-=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator*=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator/=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator%=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator<<=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator>>=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator&=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator|=(const value_type &rhs);
-
-    // XXX parameterize the type of rhs
-    derived_type &operator^=(const value_type &rhs);
-
-  private:
-    const pointer m_ptr;
-
-    // allow access to m_ptr for other references
-    template <typename OtherElement, typename OtherPointer, typename OtherDerived> friend class reference;
-
-    template<typename System>
-    __host__ __device__
-    inline value_type strip_const_get_value(const System &system) const;
-
-    template<typename OtherPointer>
-    __host__ __device__
-    inline void assign_from(OtherPointer src);
-
-    // XXX this helper exists only to avoid warnings about null references from the other assign_from
-    template<typename System1, typename System2, typename OtherPointer>
-    inline __host__ __device__
-    void assign_from(System1 *system1, System2 *system2, OtherPointer src);
-
-    template<typename System, typename OtherPointer>
-    __host__ __device__
-    inline void strip_const_assign_value(const System &system, OtherPointer src);
-
-    // XXX this helper exists only to avoid warnings about null references from the other swap
-    template<typename System>
-    inline __host__ __device__
-    void swap(System *system, derived_type &other);
-
-    // XXX this helper exists only to avoid warnings about null references from operator value_type ()
-    template<typename System>
-    inline __host__ __device__
-    value_type convert_to_value_type(System *system) const;
-}; // end reference
-
-  
-} // end thrust
-
-#include <thrust/detail/reference.inl>
-
diff --git a/compat/thrust/detail/reference.inl b/compat/thrust/detail/reference.inl
deleted file mode 100644
index 8b55edb712..0000000000
--- a/compat/thrust/detail/reference.inl
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/reference.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/memory.h>
-#include <thrust/system/detail/adl/get_value.h>
-#include <thrust/system/detail/adl/assign_value.h>
-#include <thrust/system/detail/adl/iter_swap.h>
-
-
-namespace thrust
-{
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    reference<Element,Pointer,Derived>
-      ::reference(const reference<OtherElement,OtherPointer,OtherDerived> &other,
-                  typename thrust::detail::enable_if_convertible<
-                    typename reference<OtherElement,OtherPointer,OtherDerived>::pointer,
-                    pointer
-                  >::type *)
-        : m_ptr(other.m_ptr)
-{}
-
-
-template<typename Element, typename Pointer, typename Derived>
-  reference<Element,Pointer,Derived>
-    ::reference(const pointer &ptr)
-      : m_ptr(ptr)
-{}
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::pointer
-    reference<Element,Pointer,Derived>
-      ::operator&() const
-{
-  return m_ptr;
-} // end reference::operator&()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator=(const value_type &v)
-{
-  assign_from(&v);
-  return static_cast<derived_type&>(*this);
-} // end reference::operator=()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator=(const reference &other)
-{
-  assign_from(&other); 
-  return static_cast<derived_type&>(*this);
-} // end reference::operator=()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    typename reference<Element,Pointer,Derived>::derived_type &
-      reference<Element,Pointer,Derived>
-        ::operator=(const reference<OtherElement,OtherPointer,OtherDerived> &other)
-{
-  assign_from(&other);
-  return static_cast<derived_type&>(*this);
-} // end reference::operator=()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename System>
-    typename reference<Element,Pointer,Derived>::value_type
-      reference<Element,Pointer,Derived>
-        ::convert_to_value_type(System *system) const
-{
-  using thrust::system::detail::generic::select_system;
-  return strip_const_get_value(select_system(*system));
-} // end convert_to_value_type()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  reference<Element,Pointer,Derived>
-    ::operator typename reference<Element,Pointer,Derived>::value_type () const
-{
-  typedef typename thrust::iterator_system<pointer>::type System;
-
-  // XXX avoid default-constructing a system
-  // XXX use null a reference for dispatching
-  // XXX this assumes that the eventual invocation of
-  // XXX get_value will not access system state
-  System *system = 0;
-
-  return convert_to_value_type(system);
-} // end reference::operator value_type ()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename System>
-    typename reference<Element,Pointer,Derived>::value_type
-      reference<Element,Pointer,Derived>
-        ::strip_const_get_value(const System &system) const
-{
-  System &non_const_system = const_cast<System&>(system);
-
-  using thrust::system::detail::generic::get_value;
-
-  return get_value(thrust::detail::derived_cast(non_const_system), m_ptr);
-} // end reference::strip_const_get_value()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename System1, typename System2, typename OtherPointer>
-    void reference<Element,Pointer,Derived>
-      ::assign_from(System1 *system1, System2 *system2, OtherPointer src)
-{
-  using thrust::system::detail::generic::select_system;
-
-  strip_const_assign_value(select_system(*system1, *system2), src);
-} // end assign_from()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename OtherPointer>
-    void reference<Element,Pointer,Derived>
-      ::assign_from(OtherPointer src)
-{
-  typedef typename thrust::iterator_system<pointer>::type      System1;
-  typedef typename thrust::iterator_system<OtherPointer>::type System2;
-
-  // XXX avoid default-constructing a system
-  // XXX use null references for dispatching
-  // XXX this assumes that the eventual invocation of
-  // XXX assign_value will not access system state
-  System1 *system1 = 0;
-  System2 *system2 = 0;
-
-  assign_from(system1, system2, src);
-} // end assign_from()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename System, typename OtherPointer>
-    void reference<Element,Pointer,Derived>
-      ::strip_const_assign_value(const System &system, OtherPointer src)
-{
-  System &non_const_system = const_cast<System&>(system);
-
-  using thrust::system::detail::generic::assign_value;
-
-  assign_value(thrust::detail::derived_cast(non_const_system), m_ptr, src);
-} // end strip_const_assign_value()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  template<typename System>
-    void reference<Element,Pointer,Derived>
-      ::swap(System *system, derived_type &other)
-{
-  using thrust::system::detail::generic::select_system;
-  using thrust::system::detail::generic::iter_swap;
-
-  iter_swap(select_system(*system, *system), m_ptr, other.m_ptr);
-} // end reference::swap()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  void reference<Element,Pointer,Derived>
-    ::swap(derived_type &other)
-{
-  typedef typename thrust::iterator_system<pointer>::type System;
-
-  // XXX avoid default-constructing a system
-  // XXX use null references for dispatching
-  // XXX this assumes that the eventual invocation
-  // XXX of iter_swap will not access system state
-  System *system = 0;
-
-  swap(system, other);
-} // end reference::swap()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator++(void)
-{
-  value_type temp = *this;
-  ++temp;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator++()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::value_type
-    reference<Element,Pointer,Derived>
-      ::operator++(int)
-{
-  value_type temp = *this;
-  value_type result = temp++;
-  *this = temp;
-  return result;
-} // end reference::operator++()
-
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator+=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp += rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator+=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator--(void)
-{
-  value_type temp = *this;
-  --temp;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator--()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::value_type
-    reference<Element,Pointer,Derived>
-      ::operator--(int)
-{
-  value_type temp = *this;
-  value_type result = temp--;
-  *this = temp;
-  return result;
-} // end reference::operator--()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator-=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp -= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator-=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator*=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp *= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator*=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator/=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp /= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator/=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator%=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp %= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator%=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator<<=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp <<= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator<<=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator>>=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp >>= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator>>=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator&=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp &= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator&=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator|=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp |= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator|=()
-
-template<typename Element, typename Pointer, typename Derived>
-  typename reference<Element,Pointer,Derived>::derived_type &
-    reference<Element,Pointer,Derived>
-      ::operator^=(const value_type &rhs)
-{
-  value_type temp = *this;
-  temp ^= rhs;
-  *this = temp;
-  return static_cast<derived_type&>(*this);
-} // end reference::operator^=()
-
-  
-} // end thrust
-
diff --git a/compat/thrust/detail/reference_forward_declaration.h b/compat/thrust/detail/reference_forward_declaration.h
deleted file mode 100644
index 60524d3ec2..0000000000
--- a/compat/thrust/detail/reference_forward_declaration.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/use_default.h>
-
-namespace thrust
-{
-
-template<typename Element, typename Pointer, typename Derived = use_default> class reference;
-
-} // end thrust
-
diff --git a/compat/thrust/detail/remove.inl b/compat/thrust/detail/remove.inl
deleted file mode 100644
index 5675243fbd..0000000000
--- a/compat/thrust/detail/remove.inl
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file remove.inl
- *  \brief Inline file for remove.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/remove.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/remove.h>
-#include <thrust/system/detail/adl/remove.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value)
-{
-  using thrust::system::detail::generic::remove;
-  return remove(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
-} // end remove()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value)
-{
-  using thrust::system::detail::generic::remove_copy;
-  return remove_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, value);
-} // end remove_copy()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::remove_if;
-  return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred);
-} // end remove_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  using thrust::system::detail::generic::remove_copy_if;
-  return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred);
-} // end remove_copy_if()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::remove_if;
-  return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred);
-} // end remove_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  using thrust::system::detail::generic::remove_copy_if;
-  return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred);
-} // end remove_copy_if()
-
-
-template<typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::remove(select_system(system), first, last, value);
-} // end remove()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::remove_copy(select_system(system1,system2), first, last, result, value);
-} // end remove_copy()
-
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::remove_if(select_system(system), first, last, pred);
-} // end remove_if()
-
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-  typedef typename thrust::iterator_system<InputIterator>::type   System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::remove_if(select_system(system1,system2), first, last, stencil, pred);
-} // end remove_if()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::remove_copy_if(select_system(system1,system2), first, last, result, pred);
-} // end remove_copy_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::remove_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred);
-} // end remove_copy_if()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/replace.inl b/compat/thrust/detail/replace.inl
deleted file mode 100644
index 1eaf24d621..0000000000
--- a/compat/thrust/detail/replace.inl
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file replace.inl
- *  \brief Inline file for replace.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/replace.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/replace.h>
-#include <thrust/system/detail/adl/replace.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void replace(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               ForwardIterator first, ForwardIterator last,
-               const T &old_value,
-               const T &new_value)
-{
-  using thrust::system::detail::generic::replace;
-  return replace(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, old_value, new_value);
-} // end replace()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  ForwardIterator first, ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value)
-{
-  using thrust::system::detail::generic::replace_if;
-  return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred, new_value);
-} // end replace_if()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  ForwardIterator first, ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value)
-{
-  using thrust::system::detail::generic::replace_if;
-  return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred, new_value);
-} // end replace_if()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              InputIterator first, InputIterator last,
-                              OutputIterator result,
-                              const T &old_value,
-                              const T &new_value)
-{
-  using thrust::system::detail::generic::replace_copy;
-  return replace_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, old_value, new_value);
-} // end replace_copy()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                 InputIterator first, InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  using thrust::system::detail::generic::replace_copy_if;
-  return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred, new_value);
-} // end replace_copy_if()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                 InputIterator1 first, InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  using thrust::system::detail::generic::replace_copy_if;
-  return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred, new_value);
-} // end replace_copy_if()
-
-
-template<typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(InputIterator first, InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::replace_copy_if(select_system(system1,system2), first, last, result, pred, new_value);
-} // end replace_copy_if()
-
-
-template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::replace_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred, new_value);
-} // end replace_copy_if()
-
-
-template<typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(InputIterator first, InputIterator last,
-                              OutputIterator result,
-                              const T &old_value,
-                              const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::replace_copy(select_system(system1,system2), first, last, result, old_value, new_value);
-} // end replace_copy()
-
-
-template<typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(ForwardIterator first, ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::replace_if(select_system(system), first, last, pred, new_value);
-} // end replace_if()
-
-
-template<typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(ForwardIterator first, ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System1;
-  typedef typename thrust::iterator_system<InputIterator>::type   System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::replace_if(select_system(system1,system2), first, last, stencil, pred, new_value);
-} // end replace_if()
-
-
-template<typename ForwardIterator, typename T>
-  void replace(ForwardIterator first, ForwardIterator last,
-               const T &old_value,
-               const T &new_value)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::replace(select_system(system), first, last, old_value, new_value);
-} // end replace()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/reverse.inl b/compat/thrust/detail/reverse.inl
deleted file mode 100644
index 18c26c00e6..0000000000
--- a/compat/thrust/detail/reverse.inl
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reverse.inl
- *  \brief Inline file for reverse.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/reverse.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/reverse.h>
-#include <thrust/system/detail/adl/reverse.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename BidirectionalIterator>
-  void reverse(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               BidirectionalIterator first,
-               BidirectionalIterator last)
-{
-  using thrust::system::detail::generic::reverse;
-  return reverse(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end reverse()
-
-
-template<typename DerivedPolicy, typename BidirectionalIterator, typename OutputIterator>
-  OutputIterator reverse_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result)
-{
-  using thrust::system::detail::generic::reverse_copy;
-  return reverse_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end reverse_copy()
-
-
-template<typename BidirectionalIterator>
-  void reverse(BidirectionalIterator first,
-               BidirectionalIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<BidirectionalIterator>::type System;
-
-  System system;
-
-  return thrust::reverse(select_system(system), first, last);
-} // end reverse()
-
-
-template<typename BidirectionalIterator,
-         typename OutputIterator>
-  OutputIterator reverse_copy(BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<BidirectionalIterator>::type System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type        System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::reverse_copy(select_system(system1,system2), first, last, result);
-} // end reverse_copy()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/scan.inl b/compat/thrust/detail/scan.inl
deleted file mode 100644
index 3e5fd9b4f7..0000000000
--- a/compat/thrust/detail/scan.inl
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.inl
- *  \brief Inline file for scan.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/scan.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/scan.h>
-#include <thrust/system/detail/generic/scan_by_key.h>
-#include <thrust/system/detail/adl/scan.h>
-#include <thrust/system/detail/adl/scan_by_key.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  using thrust::system::detail::generic::inclusive_scan;
-  return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end inclusive_scan() 
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::inclusive_scan;
-  return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op);
-} // end inclusive_scan()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  using thrust::system::detail::generic::exclusive_scan;
-  return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end exclusive_scan()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init)
-{
-  using thrust::system::detail::generic::exclusive_scan;
-  return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init);
-} // end exclusive_scan()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::exclusive_scan;
-  return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init, binary_op);
-} // end exclusive_scan()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  using thrust::system::detail::generic::inclusive_scan_by_key;
-  return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result);
-} // end inclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::inclusive_scan_by_key;
-  return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred);
-} // end inclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::inclusive_scan_by_key;
-  return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred, binary_op);
-} // end inclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  using thrust::system::detail::generic::exclusive_scan_by_key;
-  return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result);
-} // end exclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init)
-{
-  using thrust::system::detail::generic::exclusive_scan_by_key;
-  return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init);
-} // end exclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::exclusive_scan_by_key;
-  return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred);
-} // end exclusive_scan_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::exclusive_scan_by_key;
-  return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred, binary_op);
-} // end exclusive_scan_by_key()
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::inclusive_scan(select_system(system1,system2), first, last, result);
-} // end inclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::inclusive_scan(select_system(system1,system2), first, last, result, binary_op);
-} // end inclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::exclusive_scan(select_system(system1,system2), first, last, result);
-} // end exclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init);
-} // end exclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init, binary_op);
-} // end exclusive_scan()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred, binary_op);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred, binary_op);
-}
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/scatter.inl b/compat/thrust/detail/scatter.inl
deleted file mode 100644
index 934addb727..0000000000
--- a/compat/thrust/detail/scatter.inl
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scatter.inl
- *  \brief Inline file for scatter.h.
- */
-
-#include <thrust/scatter.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/scatter.h>
-#include <thrust/system/detail/adl/scatter.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator output)
-{
-  using thrust::system::detail::generic::scatter;
-  return scatter(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, output);
-} // end scatter()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output)
-{
-  using thrust::system::detail::generic::scatter_if;
-  return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output);
-} // end scatter_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred)
-{
-  using thrust::system::detail::generic::scatter_if;
-  return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output, pred);
-} // end scatter_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator output)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type       System1; 
-  typedef typename thrust::iterator_system<InputIterator2>::type       System2; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System3; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::scatter(select_system(system1,system2,system3), first, last, map, output);
-} // end scatter()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type       System1; 
-  typedef typename thrust::iterator_system<InputIterator2>::type       System2; 
-  typedef typename thrust::iterator_system<InputIterator3>::type       System3; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System4; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output);
-} // end scatter_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type       System1; 
-  typedef typename thrust::iterator_system<InputIterator2>::type       System2; 
-  typedef typename thrust::iterator_system<InputIterator3>::type       System3; 
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System4; 
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output, pred);
-} // end scatter_if()
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/sequence.inl b/compat/thrust/detail/sequence.inl
deleted file mode 100644
index f1741877f8..0000000000
--- a/compat/thrust/detail/sequence.inl
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file sequence.inl
- *  \brief Inline file for sequence.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/sequence.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/sequence.h>
-#include <thrust/system/detail/adl/sequence.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last)
-{
-  using thrust::system::detail::generic::sequence;
-  return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end sequence()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init)
-{
-  using thrust::system::detail::generic::sequence;
-  return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init);
-} // end sequence()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step)
-{
-  using thrust::system::detail::generic::sequence;
-  return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, step);
-} // end sequence()
-
-
-template<typename ForwardIterator>
-  void sequence(ForwardIterator first,
-                ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::sequence(select_system(system), first, last);
-} // end sequence()
-
-
-template<typename ForwardIterator, typename T>
-  void sequence(ForwardIterator first,
-                ForwardIterator last,
-                T init)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::sequence(select_system(system), first, last, init);
-} // end sequence()
-
-
-template<typename ForwardIterator, typename T>
-  void sequence(ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::sequence(select_system(system), first, last, init, step);
-} // end sequence()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/set_operations.inl b/compat/thrust/detail/set_operations.inl
deleted file mode 100644
index daec46156d..0000000000
--- a/compat/thrust/detail/set_operations.inl
+++ /dev/null
@@ -1,836 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file set_operations.inl
- *  \brief Inline file for set_operations.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/set_operations.h>
-#include <thrust/system/detail/adl/set_operations.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1                                              first1,
-                                InputIterator1                                              last1,
-                                InputIterator2                                              first2,
-                                InputIterator2                                              last2,
-                                OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::set_difference;
-  return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result);
-} // end set_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1                                              first1,
-                                InputIterator1                                              last1,
-                                InputIterator2                                              first2,
-                                InputIterator2                                              last2,
-                                OutputIterator                                              result,
-                                StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_difference;
-  return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp);
-} // end set_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1                                              keys_first1,
-                          InputIterator1                                              keys_last1,
-                          InputIterator2                                              keys_first2,
-                          InputIterator2                                              keys_last2,
-                          InputIterator3                                              values_first1,
-                          InputIterator4                                              values_first2,
-                          OutputIterator1                                             keys_result,
-                          OutputIterator2                                             values_result)
-{
-  using thrust::system::detail::generic::set_difference_by_key;
-  return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1                                              keys_first1,
-                          InputIterator1                                              keys_last1,
-                          InputIterator2                                              keys_first2,
-                          InputIterator2                                              keys_last2,
-                          InputIterator3                                              values_first1,
-                          InputIterator4                                              values_first2,
-                          OutputIterator1                                             keys_result,
-                          OutputIterator2                                             values_result,
-                          StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_difference_by_key;
-  return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  InputIterator1                                              first1,
-                                  InputIterator1                                              last1,
-                                  InputIterator2                                              first2,
-                                  InputIterator2                                              last2,
-                                  OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::set_intersection;
-  return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result);
-} // end set_intersection()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_intersection(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  InputIterator1                                              first1,
-                                  InputIterator1                                              last1,
-                                  InputIterator2                                              first2,
-                                  InputIterator2                                              last2,
-                                  OutputIterator                                              result,
-                                  StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_intersection;
-  return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp);
-} // end set_intersection()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            InputIterator1                                              keys_first1,
-                            InputIterator1                                              keys_last1,
-                            InputIterator2                                              keys_first2,
-                            InputIterator2                                              keys_last2,
-                            InputIterator3                                              values_first1,
-                            OutputIterator1                                             keys_result,
-                            OutputIterator2                                             values_result)
-{
-  using thrust::system::detail::generic::set_intersection_by_key;
-  return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result);
-} // end set_intersection_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            InputIterator1                                              keys_first1,
-                            InputIterator1                                              keys_last1,
-                            InputIterator2                                              keys_first2,
-                            InputIterator2                                              keys_last2,
-                            InputIterator3                                              values_first1,
-                            OutputIterator1                                             keys_result,
-                            OutputIterator2                                             values_result,
-                            StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_intersection_by_key;
-  return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp);
-} // end set_intersection_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator1                                              first1,
-                                          InputIterator1                                              last1,
-                                          InputIterator2                                              first2,
-                                          InputIterator2                                              last2,
-                                          OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::set_symmetric_difference;
-  return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result);
-} // end set_symmetric_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator1                                              first1,
-                                          InputIterator1                                              last1,
-                                          InputIterator2                                              first2,
-                                          InputIterator2                                              last2,
-                                          OutputIterator                                              result,
-                                          StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_symmetric_difference;
-  return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp);
-} // end set_symmetric_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                    InputIterator1                                              keys_first1,
-                                    InputIterator1                                              keys_last1,
-                                    InputIterator2                                              keys_first2,
-                                    InputIterator2                                              keys_last2,
-                                    InputIterator3                                              values_first1,
-                                    InputIterator4                                              values_first2,
-                                    OutputIterator1                                             keys_result,
-                                    OutputIterator2                                             values_result)
-{
-  using thrust::system::detail::generic::set_symmetric_difference_by_key;
-  return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_symmetric_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                    InputIterator1                                              keys_first1,
-                                    InputIterator1                                              keys_last1,
-                                    InputIterator2                                              keys_first2,
-                                    InputIterator2                                              keys_last2,
-                                    InputIterator3                                              values_first1,
-                                    InputIterator4                                              values_first2,
-                                    OutputIterator1                                             keys_result,
-                                    OutputIterator2                                             values_result,
-                                    StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_symmetric_difference_by_key;
-  return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_symmetric_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              first1,
-                           InputIterator1                                              last1,
-                           InputIterator2                                              first2,
-                           InputIterator2                                              last2,
-                           OutputIterator                                              result)
-{
-  using thrust::system::detail::generic::set_union;
-  return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result);
-} // end set_union()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_union(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              first1,
-                           InputIterator1                                              last1,
-                           InputIterator2                                              first2,
-                           InputIterator2                                              last2,
-                           OutputIterator                                              result,
-                           StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_union;
-  return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp);
-} // end set_union()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1                                              keys_first1,
-                     InputIterator1                                              keys_last1,
-                     InputIterator2                                              keys_first2,
-                     InputIterator2                                              keys_last2,
-                     InputIterator3                                              values_first1,
-                     InputIterator4                                              values_first2,
-                     OutputIterator1                                             keys_result,
-                     OutputIterator2                                             values_result)
-{
-  using thrust::system::detail::generic::set_union_by_key;
-  return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_union_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1                                              keys_first1,
-                     InputIterator1                                              keys_last1,
-                     InputIterator2                                              keys_first2,
-                     InputIterator2                                              keys_last2,
-                     InputIterator3                                              values_first1,
-                     InputIterator4                                              values_first2,
-                     OutputIterator1                                             keys_result,
-                     OutputIterator2                                             values_result,
-                     StrictWeakCompare                                           comp)
-{
-  using thrust::system::detail::generic::set_union_by_key;
-  return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_union_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_difference(InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result,
-                                StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp);
-} // end set_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result);
-} // end set_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(InputIterator1 keys_first1,
-                          InputIterator1 keys_last1,
-                          InputIterator2 keys_first2,
-                          InputIterator2 keys_last2,
-                          InputIterator3 values_first1,
-                          InputIterator4 values_first2,
-                          OutputIterator1 keys_result,
-                          OutputIterator2 values_result,
-                          StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_difference_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(InputIterator1 keys_first1,
-                          InputIterator1 keys_last1,
-                          InputIterator2 keys_first2,
-                          InputIterator2 keys_last2,
-                          InputIterator3 values_first1,
-                          InputIterator4 values_first2,
-                          OutputIterator1 keys_result,
-                          OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_difference_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_intersection(InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result,
-                                  StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp);
-} // end set_intersection()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result);
-} // end set_intersection()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(InputIterator1 keys_first1,
-                            InputIterator1 keys_last1,
-                            InputIterator2 keys_first2,
-                            InputIterator2 keys_last2,
-                            InputIterator3 values_first1,
-                            OutputIterator1 keys_result,
-                            OutputIterator2 values_result,
-                            StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System4;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System5;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-
-  return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp);
-} // end set_intersection_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(InputIterator1 keys_first1,
-                            InputIterator1 keys_last1,
-                            InputIterator2 keys_first2,
-                            InputIterator2 keys_last2,
-                            InputIterator3 values_first1,
-                            OutputIterator1 keys_result,
-                            OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System4;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System5;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-
-  return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result);
-} // end set_intersection_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_symmetric_difference(InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result,
-                                          StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp);
-} // end set_symmetric_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result);
-} // end set_symmetric_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(InputIterator1 keys_first1,
-                                    InputIterator1 keys_last1,
-                                    InputIterator2 keys_first2,
-                                    InputIterator2 keys_last2,
-                                    InputIterator3 values_first1,
-                                    InputIterator4 values_first2,
-                                    OutputIterator1 keys_result,
-                                    OutputIterator2 values_result,
-                                    StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_symmetric_difference_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(InputIterator1 keys_first1,
-                                    InputIterator1 keys_last1,
-                                    InputIterator2 keys_first2,
-                                    InputIterator2 keys_last2,
-                                    InputIterator3 values_first1,
-                                    InputIterator4 values_first2,
-                                    OutputIterator1 keys_result,
-                                    OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_symmetric_difference_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_union(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result,
-                           StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp);
-} // end set_union()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result);
-} // end set_union()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(InputIterator1 keys_first1,
-                     InputIterator1 keys_last1,
-                     InputIterator2 keys_first2,
-                     InputIterator2 keys_last2,
-                     InputIterator3 values_first1,
-                     InputIterator4 values_first2,
-                     OutputIterator1 keys_result,
-                     OutputIterator2 values_result,
-                     StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-} // end set_union_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(InputIterator1 keys_first1,
-                     InputIterator1 keys_last1,
-                     InputIterator2 keys_first2,
-                     InputIterator2 keys_last2,
-                     InputIterator3 values_first1,
-                     InputIterator4 values_first2,
-                     OutputIterator1 keys_result,
-                     OutputIterator2 values_result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<InputIterator4>::type  System4;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System5;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System6;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-  System5 system5;
-  System6 system6;
-
-  return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result);
-} // end set_union_by_key()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/sort.inl b/compat/thrust/detail/sort.inl
deleted file mode 100644
index 08be55a8ce..0000000000
--- a/compat/thrust/detail/sort.inl
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file sort.inl
- *  \brief Inline file for sort.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/sort.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/sort.h>
-#include <thrust/system/detail/adl/sort.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  void sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last)
-{
-  using thrust::system::detail::generic::sort;
-  return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end sort()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::sort;
-  return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end sort()
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  void stable_sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last)
-{
-  using thrust::system::detail::generic::stable_sort;
-  return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end stable_sort()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::stable_sort;
-  return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end stable_sort()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first)
-{
-  using thrust::system::detail::generic::sort_by_key;
-  return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first);
-} // end sort_by_key()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::sort_by_key;
-  return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp);
-} // end sort_by_key()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first)
-{
-  using thrust::system::detail::generic::stable_sort_by_key;
-  return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first);
-} // end stable_sort_by_key()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::stable_sort_by_key;
-  return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp);
-} // end stable_sort_by_key()
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  bool is_sorted(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last)
-{
-  using thrust::system::detail::generic::is_sorted;
-  return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end is_sorted()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Compare>
-  bool is_sorted(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp)
-{
-  using thrust::system::detail::generic::is_sorted;
-  return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end is_sorted()
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last)
-{
-  using thrust::system::detail::generic::is_sorted_until;
-  return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end is_sorted_until()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Compare>
-  ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp)
-{
-  using thrust::system::detail::generic::is_sorted_until;
-  return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp);
-} // end is_sorted_until()
-
-
-///////////////
-// Key Sorts //
-///////////////
-
-template<typename RandomAccessIterator>
-  void sort(RandomAccessIterator first,
-            RandomAccessIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System;
-
-  System system;
-
-  return thrust::sort(select_system(system), first, last);
-} // end sort()
-
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System;
-
-  System system;
-
-  return thrust::sort(select_system(system), first, last, comp);
-} // end sort()
-
-
-template<typename RandomAccessIterator>
-  void stable_sort(RandomAccessIterator first,
-                   RandomAccessIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System;
-
-  System system;
-
-  return thrust::stable_sort(select_system(system), first, last);
-} // end stable_sort() 
-
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type System;
-
-  System system;
-
-  return thrust::stable_sort(select_system(system), first, last, comp);
-} // end stable_sort()
-
-
-
-/////////////////////
-// Key-Value Sorts //
-/////////////////////
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type System1;
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first);
-} // end sort_by_key()
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type System1;
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp);
-} // end sort_by_key()
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type System1;
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first);
-} // end stable_sort_by_key()
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type System1;
-  typedef typename thrust::iterator_system<RandomAccessIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp);
-} // end stable_sort_by_key()
-
-
-template<typename ForwardIterator>
-  bool is_sorted(ForwardIterator first,
-                 ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-  
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::is_sorted(select_system(system), first, last);
-} // end is_sorted()
-
-
-template<typename ForwardIterator,
-         typename Compare>
-  bool is_sorted(ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp)
-{
-  using thrust::system::detail::generic::select_system;
-  
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::is_sorted(select_system(system), first, last, comp);
-} // end is_sorted()
-
-
-template<typename ForwardIterator>
-  ForwardIterator is_sorted_until(ForwardIterator first,
-                                  ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-  
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::is_sorted_until(select_system(system), first, last);
-} // end is_sorted_until()
-
-
-template<typename ForwardIterator,
-         typename Compare>
-  ForwardIterator is_sorted_until(ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp)
-{
-  using thrust::system::detail::generic::select_system;
-  
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::is_sorted_until(select_system(system), first, last, comp);
-} // end is_sorted_until()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/static_assert.h b/compat/thrust/detail/static_assert.h
deleted file mode 100644
index ccc084286c..0000000000
--- a/compat/thrust/detail/static_assert.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-
-/*
- * (C) Copyright John Maddock 2000.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-//
-// Helper macro THRUST_JOIN (based on BOOST_JOIN):
-// The following piece of macro magic joins the two
-// arguments together, even when one of the arguments is
-// itself a macro (see 16.3.1 in C++ standard).  The key
-// is that macro expansion of macro arguments does not
-// occur in THRUST_DO_JOIN2 but does in THRUST_DO_JOIN.
-//
-#define THRUST_JOIN( X, Y ) THRUST_DO_JOIN( X, Y )
-#define THRUST_DO_JOIN( X, Y ) THRUST_DO_JOIN2(X,Y)
-#define THRUST_DO_JOIN2( X, Y ) X##Y
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// HP aCC cannot deal with missing names for template value parameters
-template <bool x> struct STATIC_ASSERTION_FAILURE;
-
-template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
-
-// HP aCC cannot deal with missing names for template value parameters
-template<int x> struct static_assert_test{};
-
-template<typename, bool x>
-  struct depend_on_instantiation
-{
-  static const bool value = x;
-};
-
-} // end detail
-
-} // end thrust
-
-#define THRUST_STATIC_ASSERT( B ) \
-   typedef ::thrust::detail::static_assert_test<\
-      sizeof(::thrust::detail::STATIC_ASSERTION_FAILURE< (bool)( B ) >)>\
-         THRUST_JOIN(thrust_static_assert_typedef_, __LINE__)
-
diff --git a/compat/thrust/detail/swap.h b/compat/thrust/detail/swap.h
deleted file mode 100644
index 9f82ac247a..0000000000
--- a/compat/thrust/detail/swap.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-template<typename Assignable1, typename Assignable2>
-__host__ __device__
-inline void swap(Assignable1 &a, Assignable2 &b)
-{
-  Assignable1 temp = a;
-  a = b;
-  b = temp;
-} // end swap()
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/swap.inl b/compat/thrust/detail/swap.inl
deleted file mode 100644
index eafd70ae6a..0000000000
--- a/compat/thrust/detail/swap.inl
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/swap.h>
-#include <thrust/detail/swap.h>
-#include <thrust/detail/swap_ranges.inl>
-
diff --git a/compat/thrust/detail/swap_ranges.inl b/compat/thrust/detail/swap_ranges.inl
deleted file mode 100644
index e3b06deb01..0000000000
--- a/compat/thrust/detail/swap_ranges.inl
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file swap_ranges.inl
- *  \brief Inline file for swap_ranges.h.
- */
-
-#include <thrust/swap.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/swap_ranges.h>
-#include <thrust/system/detail/adl/swap_ranges.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2)
-{
-  using thrust::system::detail::generic::swap_ranges;
-  return swap_ranges(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2);
-} // end swap_ranges()
-
-
-template<typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator1>::type System1;
-  typedef typename thrust::iterator_system<ForwardIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::swap_ranges(select_system(system1,system2), first1, last1, first2);
-} // end swap_ranges()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/tabulate.inl b/compat/thrust/detail/tabulate.inl
deleted file mode 100644
index 961c76e9f6..0000000000
--- a/compat/thrust/detail/tabulate.inl
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/tabulate.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/tabulate.h>
-#include <thrust/system/detail/adl/tabulate.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename UnaryOperation>
-  void tabulate(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op)
-{
-  using thrust::system::detail::generic::tabulate;
-  return tabulate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op);
-} // end tabulate()
-
-
-template<typename ForwardIterator, typename UnaryOperation>
-  void tabulate(ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::tabulate(select_system(system), first, last, unary_op);
-} // end tabulate()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/temporary_array.h b/compat/thrust/detail/temporary_array.h
deleted file mode 100644
index 3a9e08481c..0000000000
--- a/compat/thrust/detail/temporary_array.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file temporary_array.h
- *  \brief Container-like class temporary storage inside algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/retag.h>
-#include <thrust/detail/contiguous_storage.h>
-#include <thrust/detail/allocator/temporary_allocator.h>
-#include <thrust/detail/allocator/no_throw_allocator.h>
-#include <memory>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename T, typename System>
-  class temporary_array
-    : public contiguous_storage<
-               T,
-               no_throw_allocator<
-                 temporary_allocator<T,System>
-               >
-             >
-{
-  private:
-    typedef contiguous_storage<
-      T,
-      no_throw_allocator<
-        temporary_allocator<T,System>
-      >
-    > super_t;
-
-    // to help out the constructor
-    typedef no_throw_allocator<temporary_allocator<T,System> > alloc_type;
-
-  public:
-    typedef typename super_t::size_type size_type;
-
-    temporary_array(thrust::execution_policy<System> &system, size_type n);
-
-    // provide a kill-switch to explicitly avoid initialization
-    temporary_array(int uninit, thrust::execution_policy<System> &system, size_type n);
-
-    template<typename InputIterator>
-    temporary_array(thrust::execution_policy<System> &system,
-                    InputIterator first,
-                    size_type n);
-
-    template<typename InputIterator, typename InputSystem>
-    temporary_array(thrust::execution_policy<System> &system,
-                    thrust::execution_policy<InputSystem> &input_system,
-                    InputIterator first,
-                    size_type n);
-
-    template<typename InputIterator>
-    temporary_array(thrust::execution_policy<System> &system,
-                    InputIterator first,
-                    InputIterator last);
-
-    template<typename InputSystem, typename InputIterator>
-    temporary_array(thrust::execution_policy<System> &system,
-                    thrust::execution_policy<InputSystem> &input_system,
-                    InputIterator first,
-                    InputIterator last);
-
-    ~temporary_array();
-}; // end temporary_array
-
-
-// XXX eliminate this when we do ranges for real
-template<typename Iterator, typename System>
-  class tagged_iterator_range
-{
-  public:
-    typedef thrust::detail::tagged_iterator<Iterator,System> iterator;
-
-    template<typename Ignored1, typename Ignored2>
-    tagged_iterator_range(const Ignored1 &, const Ignored2 &, Iterator first, Iterator last)
-      : m_begin(reinterpret_tag<System>(first)),
-        m_end(reinterpret_tag<System>(last))
-    {}
-
-    iterator begin(void) const { return m_begin; }
-    iterator end(void) const { return m_end; }
-
-  private:
-    iterator m_begin, m_end;
-};
-
-
-// if FromSystem is convertible to ToSystem, then just make a shallow
-// copy of the range. else, use a temporary_array
-// note that the resulting iterator is explicitly tagged with ToSystem either way
-template<typename Iterator, typename FromSystem, typename ToSystem>
-  struct move_to_system_base
-    : public eval_if<
-        is_convertible<
-          FromSystem,
-          ToSystem
-        >::value,
-        identity_<
-          tagged_iterator_range<Iterator,ToSystem>
-        >,
-        identity_<
-          temporary_array<
-            typename thrust::iterator_value<Iterator>::type,
-            ToSystem
-          >
-        >
-      >
-{};
-
-
-template<typename Iterator, typename FromSystem, typename ToSystem>
-  class move_to_system
-    : public move_to_system_base<
-        Iterator,
-        FromSystem,
-        ToSystem
-      >::type
-{
-  typedef typename move_to_system_base<Iterator,FromSystem,ToSystem>::type super_t;
-
-  public:
-    move_to_system(thrust::execution_policy<FromSystem> &from_system,
-                   thrust::execution_policy<ToSystem> &to_system,
-                   Iterator first,
-                   Iterator last)
-      : super_t(to_system, from_system, first, last) {}
-};
-
-
-} // end detail
-} // end thrust
-
-#include <thrust/detail/temporary_array.inl>
-
diff --git a/compat/thrust/detail/temporary_array.inl b/compat/thrust/detail/temporary_array.inl
deleted file mode 100644
index 36ed16736e..0000000000
--- a/compat/thrust/detail/temporary_array.inl
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/temporary_array.h>
-#include <thrust/distance.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/detail/type_traits.h>
-
-
-namespace thrust
-{
-
-namespace detail
-{
-namespace temporary_array_detail
-{
-
-
-template<typename T> struct avoid_initialization : thrust::detail::has_trivial_copy_constructor<T> {};
-
-
-template<typename T, typename TemporaryArray, typename Size>
-typename thrust::detail::enable_if<
-  avoid_initialization<T>::value
->::type
-  construct_values(TemporaryArray &,
-                   Size)
-{
-  // avoid the overhead of initialization
-} // end construct_values()
-
-
-template<typename T, typename TemporaryArray, typename Size>
-typename thrust::detail::disable_if<
-  avoid_initialization<T>::value
->::type
-  construct_values(TemporaryArray &a,
-                   Size n)
-{
-  a.default_construct_n(a.begin(), n);
-} // end construct_values()
-
-
-} // end temporary_array_detail
-
-
-template<typename T, typename System>
-  temporary_array<T,System>
-    ::temporary_array(thrust::execution_policy<System> &system, size_type n)
-      :super_t(n, alloc_type(temporary_allocator<T,System>(system)))
-{
-  temporary_array_detail::construct_values<T>(*this, n);
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  temporary_array<T,System>
-    ::temporary_array(int, thrust::execution_policy<System> &system, size_type n)
-      :super_t(n, alloc_type(temporary_allocator<T,System>(system)))
-{
-  // avoid initialization
-  ;
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  template<typename InputIterator>
-    temporary_array<T,System>
-      ::temporary_array(thrust::execution_policy<System> &system,
-                        InputIterator first,
-                        size_type n)
-        : super_t(alloc_type(temporary_allocator<T,System>(system)))
-{
-  super_t::allocate(n);
-
-  super_t::uninitialized_copy_n(system, first, n, super_t::begin());
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  template<typename InputIterator, typename InputSystem>
-    temporary_array<T,System>
-      ::temporary_array(thrust::execution_policy<System> &system,
-                        thrust::execution_policy<InputSystem> &input_system,
-                        InputIterator first,
-                        size_type n)
-        : super_t(alloc_type(temporary_allocator<T,System>(system)))
-{
-  super_t::allocate(n);
-
-  super_t::uninitialized_copy_n(input_system, first, n, super_t::begin());
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  template<typename InputIterator>
-    temporary_array<T,System>
-      ::temporary_array(thrust::execution_policy<System> &system,
-                        InputIterator first,
-                        InputIterator last)
-        : super_t(alloc_type(temporary_allocator<T,System>(system)))
-{
-  super_t::allocate(thrust::distance(first,last));
-
-  super_t::uninitialized_copy(system, first, last, super_t::begin());
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  template<typename InputSystem, typename InputIterator>
-    temporary_array<T,System>
-      ::temporary_array(thrust::execution_policy<System> &system,
-                        thrust::execution_policy<InputSystem> &input_system,
-                        InputIterator first,
-                        InputIterator last)
-        : super_t(alloc_type(temporary_allocator<T,System>(system)))
-{
-  super_t::allocate(thrust::distance(first,last));
-
-  super_t::uninitialized_copy(input_system, first, last, super_t::begin());
-} // end temporary_array::temporary_array()
-
-
-template<typename T, typename System>
-  temporary_array<T,System>
-    ::~temporary_array()
-{
-  // note that super_t::destroy will ignore trivial destructors automatically
-  super_t::destroy(super_t::begin(), super_t::end());
-} // end temporary_array::~temporary_array()
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/temporary_buffer.h b/compat/thrust/detail/temporary_buffer.h
deleted file mode 100644
index 046a3b363a..0000000000
--- a/compat/thrust/detail/temporary_buffer.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-#include <thrust/detail/pointer.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/system/detail/generic/temporary_buffer.h>
-#include <thrust/system/detail/adl/temporary_buffer.h>
-
-namespace thrust
-{
-namespace detail
-{
-namespace get_temporary_buffer_detail
-{
-
-
-template<typename T, typename DerivedPolicy, typename Pair>
-  thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type>
-    down_cast_pair(Pair p)
-{
-  // XXX should use a hypothetical thrust::static_pointer_cast here
-  thrust::pointer<T,DerivedPolicy> ptr = thrust::pointer<T,DerivedPolicy>(static_cast<T*>(thrust::raw_pointer_cast(p.first)));
-
-  typedef thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type> result_type;
-  return result_type(ptr, p.second);
-} // end down_cast_pair()
-
-
-} // end get_temporary_buffer_detail
-} // end detail
-
-
-template<typename T, typename DerivedPolicy>
-  thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type>
-    get_temporary_buffer(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, typename thrust::pointer<T,DerivedPolicy>::difference_type n)
-{
-  using thrust::system::detail::generic::get_temporary_buffer;
-
-  return thrust::detail::get_temporary_buffer_detail::down_cast_pair<T,DerivedPolicy>(get_temporary_buffer<T>(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n));
-} // end get_temporary_buffer()
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void return_temporary_buffer(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, Pointer p)
-{
-  using thrust::system::detail::generic::return_temporary_buffer;
-
-  return return_temporary_buffer(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), p);
-} // end return_temporary_buffer()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/transform.inl b/compat/thrust/detail/transform.inl
deleted file mode 100644
index ae303bcc0d..0000000000
--- a/compat/thrust/detail/transform.inl
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform.inl
- *  \brief Inline file for transform.h.
- */
-
-#include <thrust/transform.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/transform.h>
-#include <thrust/system/detail/adl/transform.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first, InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op)
-{
-  using thrust::system::detail::generic::transform;
-  return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op);
-} // end transform()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1 first1, InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op)
-{
-  using thrust::system::detail::generic::transform;
-  return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, op);
-} // end transform()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator first, InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::transform_if;
-  return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op, pred);
-} // end transform_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator1 first, InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::transform_if;
-  return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, op, pred);
-} // end transform_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator1 first1, InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::transform_if;
-  return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, stencil, result, binary_op, pred);
-} // end transform_if()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(InputIterator first,
-                           InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::transform(select_system(system1,system2), first, last, result, op);
-} // end transform()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type System2;
-  typedef typename thrust::iterator_system<OutputIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::transform(select_system(system1,system2,system3), first1, last1, first2, result, op);
-} // end transform()
-
-
-template<typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator first,
-                               InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type   System1;
-  typedef typename thrust::iterator_system<ForwardIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::transform_if(select_system(system1,system2), first, last, result, unary_op, pred);
-} // end transform_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator1 first,
-                               InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<ForwardIterator>::type System3;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-
-  return thrust::transform_if(select_system(system1,system2,system3), first, last, stencil, result, unary_op, pred);
-} // end transform_if()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator1 first1,
-                               InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<InputIterator3>::type  System3;
-  typedef typename thrust::iterator_system<ForwardIterator>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::transform_if(select_system(system1,system2,system3,system4), first1, last1, first2, stencil, result, binary_op, pred);
-} // end transform_if()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/transform_reduce.inl b/compat/thrust/detail/transform_reduce.inl
deleted file mode 100644
index ede65030b1..0000000000
--- a/compat/thrust/detail/transform_reduce.inl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform_reduce.inl
- *  \brief Inline file for transform_reduce.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/transform_reduce.h>
-#include <thrust/system/detail/adl/transform_reduce.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::transform_reduce;
-  return transform_reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op, init, binary_op);
-} // end transform_reduce()
-
-
-template<typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type System;
-
-  System system;
-
-  return thrust::transform_reduce(select_system(system), first, last, unary_op, init, binary_op);
-} // end transform_reduce()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/transform_scan.inl b/compat/thrust/detail/transform_scan.inl
deleted file mode 100644
index 0187c4b98d..0000000000
--- a/compat/thrust/detail/transform_scan.inl
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform_scan.inl
- *  \brief Inline file for transform_scan.h.
- */
-
-#include <thrust/scan.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/transform_scan.h>
-#include <thrust/system/detail/adl/transform_scan.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename AssociativeOperator>
-  OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::transform_inclusive_scan;
-  return transform_inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, binary_op);
-} // end transform_inclusive_scan()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::transform_exclusive_scan;
-  return transform_exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, init, binary_op);
-} // end transform_exclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename BinaryFunction>
-  OutputIterator transform_inclusive_scan(InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          BinaryFunction binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::transform_inclusive_scan(select_system(system1,system2), first, last, result, unary_op, binary_op);
-} // end transform_inclusive_scan()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::transform_exclusive_scan(select_system(system1,system2), first, last, result, unary_op, init, binary_op);
-} // end transform_exclusive_scan()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/trivial_sequence.h b/compat/thrust/detail/trivial_sequence.h
deleted file mode 100644
index cc7e32be27..0000000000
--- a/compat/thrust/detail/trivial_sequence.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file trivial_sequence.h
- *  \brief Container-like class for wrapping sequences.  The wrapped
- *         sequence always has trivial iterators, even when the input
- *         sequence does not.
- */
-
-
-#pragma once
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/detail/temporary_array.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// never instantiated
-template<typename Iterator, typename DerivedPolicy, typename is_trivial> struct _trivial_sequence { };
-
-// trivial case
-template<typename Iterator, typename DerivedPolicy>
-struct _trivial_sequence<Iterator, DerivedPolicy, thrust::detail::true_type>
-{
-    typedef Iterator iterator_type;
-    Iterator first, last;
-
-    _trivial_sequence(thrust::execution_policy<DerivedPolicy> &, Iterator _first, Iterator _last) : first(_first), last(_last)
-    {
-//        std::cout << "trivial case" << std::endl;
-    }
-
-    iterator_type begin() { return first; }
-    iterator_type end()   { return last; }
-};
-
-// non-trivial case
-template<typename Iterator, typename DerivedPolicy>
-struct _trivial_sequence<Iterator, DerivedPolicy, thrust::detail::false_type>
-{
-    typedef typename thrust::iterator_value<Iterator>::type iterator_value;
-    typedef typename thrust::detail::temporary_array<iterator_value, DerivedPolicy>::iterator iterator_type;
-    
-    thrust::detail::temporary_array<iterator_value, DerivedPolicy> buffer;
-
-    _trivial_sequence(thrust::execution_policy<DerivedPolicy> &exec, Iterator first, Iterator last)
-      : buffer(exec, first, last)
-    {
-//        std::cout << "non-trivial case" << std::endl;
-    }
-
-    iterator_type begin() { return buffer.begin(); }
-    iterator_type end()   { return buffer.end(); }
-};
-
-template <typename Iterator, typename DerivedPolicy>
-struct trivial_sequence
-  : detail::_trivial_sequence<Iterator, DerivedPolicy, typename thrust::detail::is_trivial_iterator<Iterator>::type>
-{
-    typedef _trivial_sequence<Iterator, DerivedPolicy, typename thrust::detail::is_trivial_iterator<Iterator>::type> super_t;
-
-    trivial_sequence(thrust::execution_policy<DerivedPolicy> &exec, Iterator first, Iterator last) : super_t(exec, first, last) { }
-};
-
-} // end namespace detail
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/tuple.inl b/compat/thrust/detail/tuple.inl
deleted file mode 100644
index 067ad636c4..0000000000
--- a/compat/thrust/detail/tuple.inl
+++ /dev/null
@@ -1,948 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/swap.h>
-
-namespace thrust
-{
-
-// define null_type
-struct null_type {};
-
-// null_type comparisons
-__host__ __device__ inline
-bool operator==(const null_type&, const null_type&) { return true; }
-
-__host__ __device__ inline
-bool operator>=(const null_type&, const null_type&) { return true; }
-
-__host__ __device__ inline
-bool operator<=(const null_type&, const null_type&) { return true; }
-
-__host__ __device__ inline
-bool operator!=(const null_type&, const null_type&) { return false; }
-
-__host__ __device__ inline
-bool operator<(const null_type&, const null_type&) { return false; }
-
-__host__ __device__ inline
-bool operator>(const null_type&, const null_type&) { return false; }
-
-// forward declaration for tuple
-template <
-  class T0 = null_type, class T1 = null_type, class T2 = null_type,
-  class T3 = null_type, class T4 = null_type, class T5 = null_type,
-  class T6 = null_type, class T7 = null_type, class T8 = null_type,
-  class T9 = null_type>
-class tuple;
-
-// forward declaration of tuple_element
-template<int i, typename T> struct tuple_element;
-
-// specializations for tuple_element
-template<class T>
-  struct tuple_element<0,T>
-{
-  typedef typename T::head_type type;
-}; // end tuple_element<0,T>
-
-template<int N, class T>
-  struct tuple_element<N, const T>
-{
-  private:
-    typedef typename T::tail_type Next;
-    typedef typename tuple_element<N-1, Next>::type unqualified_type;
-
-  public:
-    typedef typename thrust::detail::add_const<unqualified_type>::type type;
-}; // end tuple_element<N, const T>
-
-template<class T>
-  struct tuple_element<0,const T>
-{
-  typedef typename thrust::detail::add_const<typename T::head_type>::type type;
-}; // end tuple_element<0,const T>
-
-
-
-// forward declaration of tuple_size
-template<class T> struct tuple_size;
-
-// specializations for tuple_size
-template<>
-  struct tuple_size< tuple<> >
-{
-  static const int value = 0;
-}; // end tuple_size< tuple<> >
-
-template<>
-  struct tuple_size<null_type>
-{
-  static const int value = 0;
-}; // end tuple_size<null_type>
-
-
-
-// forward declaration of detail::cons
-namespace detail
-{
-
-template <class HT, class TT> struct cons;
-
-} // end detail
-
-
-// -- some traits classes for get functions
-template <class T> struct access_traits
-{
-  typedef const T& const_type;
-  typedef T& non_const_type;
-
-  typedef const typename thrust::detail::remove_cv<T>::type& parameter_type;
-
-// used as the tuple constructors parameter types
-// Rationale: non-reference tuple element types can be cv-qualified.
-// It should be possible to initialize such types with temporaries,
-// and when binding temporaries to references, the reference must
-// be non-volatile and const. 8.5.3. (5)
-}; // end access_traits
-
-template <class T> struct access_traits<T&>
-{
-  typedef T& const_type;
-  typedef T& non_const_type;
-
-  typedef T& parameter_type;
-}; // end access_traits<T&>
-
-// forward declarations of get()
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::non_const_type
-// XXX we probably don't need to do this for any compiler we care about -jph
-//get(cons<HT, TT>& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N));
-get(detail::cons<HT, TT>& c);
-
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::const_type
-// XXX we probably don't need to do this for any compiler we care about -jph
-//get(const cons<HT, TT>& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N));
-get(const detail::cons<HT, TT>& c);
-
-namespace detail
-{
-
-// -- generate error template, referencing to non-existing members of this
-// template is used to produce compilation errors intentionally
-template<class T>
-class generate_error;
-
-// - cons getters --------------------------------------------------------
-// called: get_class<N>::get<RETURN_TYPE>(aTuple)
-
-template< int N >
-struct get_class
-{
-  template<class RET, class HT, class TT >
-  __host__ __device__
-  inline static RET get(const cons<HT, TT>& t)
-  {
-    // XXX we may not need to deal with this for any compiler we care about -jph
-    //return get_class<N-1>::BOOST_NESTED_TEMPLATE get<RET>(t.tail);
-    return get_class<N-1>::template get<RET>(t.tail);
-    
-    // gcc 4.3 couldn't compile this:
-    //return get_class<N-1>::get<RET>(t.tail);
-  }
-
-  template<class RET, class HT, class TT >
-  __host__ __device__
-  inline static RET get(cons<HT, TT>& t)
-  {
-    // XXX we may not need to deal with this for any compiler we care about -jph
-    //return get_class<N-1>::BOOST_NESTED_TEMPLATE get<RET>(t.tail);
-    return get_class<N-1>::template get<RET>(t.tail);
-
-    // gcc 4.3 couldn't compile this:
-    //return get_class<N-1>::get<RET>(t.tail);
-  }
-}; // end get_class
-
-template<>
-struct get_class<0>
-{
-  template<class RET, class HT, class TT>
-  __host__ __device__
-  inline static RET get(const cons<HT, TT>& t)
-  {
-    return t.head;
-  }
-
-  template<class RET, class HT, class TT>
-  __host__ __device__
-  inline static RET get(cons<HT, TT>& t)
-  {
-    return t.head;
-  }
-}; // get get_class<0>
-
-
-template <bool If, class Then, class Else> struct IF
-{
-  typedef Then RET;
-};
-
-template <class Then, class Else> struct IF<false, Then, Else>
-{
-  typedef Else RET;
-};
-
-//  These helper templates wrap void types and plain function types.
-//  The rationale is to allow one to write tuple types with those types
-//  as elements, even though it is not possible to instantiate such object.
-//  E.g: typedef tuple<void> some_type; // ok
-//  but: some_type x; // fails
-
-template <class T> class non_storeable_type
-{
-  __host__ __device__
-  non_storeable_type();
-};
-
-template <class T> struct wrap_non_storeable_type
-{
-  // XXX is_function looks complicated; punt for now -jph
-  //typedef typename IF<
-  //  ::thrust::detail::is_function<T>::value, non_storeable_type<T>, T
-  //>::RET type;
-
-  typedef T type;
-};
-
-template <> struct wrap_non_storeable_type<void>
-{
-  typedef non_storeable_type<void> type;
-};
-
-
-template <class HT, class TT>
-  struct cons
-{
-  typedef HT head_type;
-  typedef TT tail_type;
-
-  typedef typename
-    wrap_non_storeable_type<head_type>::type stored_head_type;
-
-  stored_head_type head;
-  tail_type tail;
-
-  inline __host__ __device__
-  typename access_traits<stored_head_type>::non_const_type
-  get_head() { return head; }
-
-  inline __host__ __device__
-  typename access_traits<tail_type>::non_const_type
-  get_tail() { return tail; }
-
-  inline __host__ __device__
-  typename access_traits<stored_head_type>::const_type
-  get_head() const { return head; }
-
-  inline __host__ __device__
-  typename access_traits<tail_type>::const_type
-  get_tail() const { return tail; }
-
-  inline __host__ __device__
-  cons(void) : head(), tail() {}
-  //  cons() : head(detail::default_arg<HT>::f()), tail() {}
-
-  // the argument for head is not strictly needed, but it prevents
-  // array type elements. This is good, since array type elements
-  // cannot be supported properly in any case (no assignment,
-  // copy works only if the tails are exactly the same type, ...)
-
-  inline __host__ __device__
-  cons(typename access_traits<stored_head_type>::parameter_type h,
-       const tail_type& t)
-    : head (h), tail(t) {}
-
-  template <class T1, class T2, class T3, class T4, class T5,
-            class T6, class T7, class T8, class T9, class T10>
-  inline __host__ __device__
-  cons( T1& t1, T2& t2, T3& t3, T4& t4, T5& t5,
-        T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 )
-    : head (t1),
-      tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast<const null_type&>(null_type()))
-      {}
-
-  template <class T2, class T3, class T4, class T5,
-            class T6, class T7, class T8, class T9, class T10>
-  inline __host__ __device__
-  cons( const null_type& /*t1*/, T2& t2, T3& t3, T4& t4, T5& t5,
-        T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 )
-    : head (),
-      tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast<const null_type&>(null_type()))
-      {}
-
-
-  template <class HT2, class TT2>
-  inline __host__ __device__
-  cons( const cons<HT2, TT2>& u ) : head(u.head), tail(u.tail) {}
-
-  template <class HT2, class TT2>
-  inline __host__ __device__
-  cons& operator=( const cons<HT2, TT2>& u ) {
-    head=u.head; tail=u.tail; return *this;
-  }
-
-  // must define assignment operator explicitly, implicit version is
-  // illformed if HT is a reference (12.8. (12))
-  inline __host__ __device__
-  cons& operator=(const cons& u) {
-    head = u.head; tail = u.tail;  return *this;
-  }
-
-  // XXX enable when we support std::pair -jph
-  //template <class T1, class T2>
-  //__host__ __device__
-  //cons& operator=( const std::pair<T1, T2>& u ) {
-  //  //BOOST_STATIC_ASSERT(length<cons>::value == 2); // check length = 2
-  //  head = u.first; tail.head = u.second; return *this;
-  //}
-
-  // get member functions (non-const and const)
-  template <int N>
-  __host__ __device__
-  typename access_traits<
-             typename tuple_element<N, cons<HT, TT> >::type
-           >::non_const_type
-  get() {
-    return thrust::get<N>(*this); // delegate to non-member get
-  }
-
-  template <int N>
-  __host__ __device__
-  typename access_traits<
-             typename tuple_element<N, cons<HT, TT> >::type
-           >::const_type
-  get() const {
-    return thrust::get<N>(*this); // delegate to non-member get
-  }
-
-  inline __host__ __device__
-  void swap(cons &c)
-  {
-    using thrust::swap;
-
-    swap(head, c.head);
-    tail.swap(c.tail);
-  }
-};
-
-template <class HT>
-  struct cons<HT, null_type>
-{
-  typedef HT head_type;
-  typedef null_type tail_type;
-  typedef cons<HT, null_type> self_type;
-
-  typedef typename
-    wrap_non_storeable_type<head_type>::type stored_head_type;
-  stored_head_type head;
-
-  typename access_traits<stored_head_type>::non_const_type
-  inline __host__ __device__
-  get_head() { return head; }
-
-  inline __host__ __device__
-  null_type get_tail() { return null_type(); }
-
-  inline __host__ __device__
-  typename access_traits<stored_head_type>::const_type
-  get_head() const { return head; }
-
-  inline __host__ __device__
-  null_type get_tail() const { return null_type(); }
-
-  inline __host__ __device__
-  cons() : head() {}
-
-  inline __host__ __device__
-  cons(typename access_traits<stored_head_type>::parameter_type h,
-       const null_type& = null_type())
-    : head (h) {}
-
-  template<class T1>
-  inline __host__ __device__
-  cons(T1& t1, const null_type&, const null_type&, const null_type&,
-       const null_type&, const null_type&, const null_type&,
-       const null_type&, const null_type&, const null_type&)
-  : head (t1) {}
-
-  inline __host__ __device__
-  cons(const null_type&,
-       const null_type&, const null_type&, const null_type&,
-       const null_type&, const null_type&, const null_type&,
-       const null_type&, const null_type&, const null_type&)
-  : head () {}
-
-  template <class HT2>
-  inline __host__ __device__
-  cons( const cons<HT2, null_type>& u ) : head(u.head) {}
-
-  template <class HT2>
-  inline __host__ __device__
-  cons& operator=(const cons<HT2, null_type>& u )
-  {
-    head = u.head;
-    return *this;
-  }
-
-  // must define assignment operator explicitly, implicit version
-  // is illformed if HT is a reference
-  inline __host__ __device__
-  cons& operator=(const cons& u) { head = u.head; return *this; }
-
-  template <int N>
-  inline __host__ __device__
-  typename access_traits<
-             typename tuple_element<N, self_type>::type
-            >::non_const_type
-  // XXX we probably don't need this for the compilers we care about -jph
-  //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N))
-  get(void)
-  {
-    return thrust::get<N>(*this);
-  }
-
-  template <int N>
-  inline __host__ __device__
-  typename access_traits<
-             typename tuple_element<N, self_type>::type
-           >::const_type
-  // XXX we probably don't need this for the compilers we care about -jph
-  //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N)) const
-  get(void) const
-  {
-    return thrust::get<N>(*this);
-  }
-
-  inline __host__ __device__
-  void swap(cons &c)
-  {
-    using thrust::swap;
-
-    swap(head, c.head);
-  }
-}; // end cons
-
-template <class T0, class T1, class T2, class T3, class T4,
-          class T5, class T6, class T7, class T8, class T9>
-  struct map_tuple_to_cons
-{
-  typedef cons<T0,
-               typename map_tuple_to_cons<T1, T2, T3, T4, T5,
-                                          T6, T7, T8, T9, null_type>::type
-              > type;
-}; // end map_tuple_to_cons
-
-// The empty tuple is a null_type
-template <>
-  struct map_tuple_to_cons<null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type>
-{
-  typedef null_type type;
-}; // end map_tuple_to_cons<...>
-
-
-
-// ---------------------------------------------------------------------------
-// The call_traits for make_tuple
-
-// Must be instantiated with plain or const plain types (not with references)
-
-// from template<class T> foo(const T& t) : make_tuple_traits<const T>::type
-// from template<class T> foo(T& t) : make_tuple_traits<T>::type
-
-// Conversions:
-// T -> T,
-// references -> compile_time_error
-// array -> const ref array
-
-
-template<class T>
-struct make_tuple_traits {
-  typedef T type;
-
-  // commented away, see below  (JJ)
-  //  typedef typename IF<
-  //  boost::is_function<T>::value,
-  //  T&,
-  //  T>::RET type;
-
-};
-
-// The is_function test was there originally for plain function types,
-// which can't be stored as such (we must either store them as references or
-// pointers). Such a type could be formed if make_tuple was called with a
-// reference to a function.
-// But this would mean that a const qualified function type was formed in
-// the make_tuple function and hence make_tuple can't take a function
-// reference as a parameter, and thus T can't be a function type.
-// So is_function test was removed.
-// (14.8.3. says that type deduction fails if a cv-qualified function type
-// is created. (It only applies for the case of explicitly specifying template
-// args, though?)) (JJ)
-
-template<class T>
-struct make_tuple_traits<T&> {
-  typedef typename
-     detail::generate_error<T&>::
-       do_not_use_with_reference_type error;
-};
-
-// Arrays can't be stored as plain types; convert them to references.
-// All arrays are converted to const. This is because make_tuple takes its
-// parameters as const T& and thus the knowledge of the potential
-// non-constness of actual argument is lost.
-template<class T, int n>  struct make_tuple_traits <T[n]> {
-  typedef const T (&type)[n];
-};
-
-template<class T, int n>
-struct make_tuple_traits<const T[n]> {
-  typedef const T (&type)[n];
-};
-
-template<class T, int n>  struct make_tuple_traits<volatile T[n]> {
-  typedef const volatile T (&type)[n];
-};
-
-template<class T, int n>
-struct make_tuple_traits<const volatile T[n]> {
-  typedef const volatile T (&type)[n];
-};
-
-// XXX enable these if we ever care about reference_wrapper -jph
-//template<class T>
-//struct make_tuple_traits<reference_wrapper<T> >{
-//  typedef T& type;
-//};
-//
-//template<class T>
-//struct make_tuple_traits<const reference_wrapper<T> >{
-//  typedef T& type;
-//};
-
-
-// a helper traits to make the make_tuple functions shorter (Vesa Karvonen's
-// suggestion)
-template <
-  class T0 = null_type, class T1 = null_type, class T2 = null_type,
-  class T3 = null_type, class T4 = null_type, class T5 = null_type,
-  class T6 = null_type, class T7 = null_type, class T8 = null_type,
-  class T9 = null_type
->
-struct make_tuple_mapper {
-  typedef
-    tuple<typename make_tuple_traits<T0>::type,
-          typename make_tuple_traits<T1>::type,
-          typename make_tuple_traits<T2>::type,
-          typename make_tuple_traits<T3>::type,
-          typename make_tuple_traits<T4>::type,
-          typename make_tuple_traits<T5>::type,
-          typename make_tuple_traits<T6>::type,
-          typename make_tuple_traits<T7>::type,
-          typename make_tuple_traits<T8>::type,
-          typename make_tuple_traits<T9>::type> type;
-};
-
-} // end detail
-
-
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::non_const_type
-get(detail::cons<HT, TT>& c)
-{
-  //return detail::get_class<N>::BOOST_NESTED_TEMPLATE
-  
-  // gcc 4.3 couldn't compile this:
-  //return detail::get_class<N>::
-
-  return detail::get_class<N>::template
-         get<
-           typename access_traits<
-             typename tuple_element<N, detail::cons<HT, TT> >::type
-           >::non_const_type,
-           HT,TT
-         >(c);
-}
-
-
-// get function for const cons-lists, returns a const reference to
-// the element. If the element is a reference, returns the reference
-// as such (that is, can return a non-const reference)
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::const_type
-get(const detail::cons<HT, TT>& c)
-{
-  //return detail::get_class<N>::BOOST_NESTED_TEMPLATE
-
-  // gcc 4.3 couldn't compile this:
-  //return detail::get_class<N>::
-
-  return detail::get_class<N>::template
-         get<
-           typename access_traits<
-             typename tuple_element<N, detail::cons<HT, TT> >::type
-           >::const_type,
-           HT,TT
-         >(c);
-}
-
-
-template<class T0>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0>::type
-    make_tuple(const T0& t0)
-{
-  typedef typename detail::make_tuple_mapper<T0>::type t;
-  return t(t0);
-} // end make_tuple()
-
-template<class T0, class T1>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1>::type
-    make_tuple(const T0& t0, const T1& t1)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1>::type t;
-  return t(t0,t1);
-} // end make_tuple()
-
-template<class T0, class T1, class T2>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2>::type t;
-  return t(t0,t1,t2);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3>::type t;
-  return t(t0,t1,t2,t3);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4>::type t;
-  return t(t0,t1,t2,t3,t4);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4, class T5>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4,T5>::type t;
-  return t(t0,t1,t2,t3,t4,t5);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4,T5,T6>::type t;
-  return t(t0,t1,t2,t3,t4,t5,t6);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4,T5,T6,T7>::type t;
-  return t(t0,t1,t2,t3,t4,t5,t6,t7);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7, T8>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4,T5,T6,T7,T8>::type t;
-  return t(t0,t1,t2,t3,t4,t5,t6,t7,t8);
-} // end make_tuple()
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9)
-{
-  typedef typename detail::make_tuple_mapper<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>::type t;
-  return t(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9);
-} // end make_tuple()
-
-
-template<typename T0>
-__host__ __device__ inline
-tuple<T0&> tie(T0 &t0)
-{
-  return tuple<T0&>(t0);
-}
-
-template<typename T0,typename T1>
-__host__ __device__ inline
-tuple<T0&,T1&> tie(T0 &t0, T1 &t1)
-{
-  return tuple<T0&,T1&>(t0,t1);
-}
-
-template<typename T0,typename T1, typename T2>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&> tie(T0 &t0, T1 &t1, T2 &t2)
-{
-  return tuple<T0&,T1&,T2&>(t0,t1,t2);
-}
-
-template<typename T0,typename T1, typename T2, typename T3>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3)
-{
-  return tuple<T0&,T1&,T2&,T3&>(t0,t1,t2,t3);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&>(t0,t1,t2,t3,t4);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4, typename T5>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&,T5&>(t0,t1,t2,t3,t4,t5);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&>(t0,t1,t2,t3,t4,t5,t6);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&>(t0,t1,t2,t3,t4,t5,t6,t7);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&>(t0,t1,t2,t3,t4,t5,t6,t7,t8);
-}
-
-template<typename T0,typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&,T9&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9)
-{
-  return tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&,T9&>(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9);
-}
-
-template<
-  typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9,
-  typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9
->
-__host__ __device__ inline
-void swap(thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> &x,
-          thrust::tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9> &y)
-{
-  return x.swap(y);
-}
-
-
-
-namespace detail
-{
-
-template<class T1, class T2>
-__host__ __device__
-inline bool eq(const T1& lhs, const T2& rhs) {
-  return lhs.get_head() == rhs.get_head() &&
-         eq(lhs.get_tail(), rhs.get_tail());
-}
-template<>
-inline bool eq<null_type,null_type>(const null_type&, const null_type&) { return true; }
-
-template<class T1, class T2>
-__host__ __device__
-inline bool neq(const T1& lhs, const T2& rhs) {
-  return lhs.get_head() != rhs.get_head()  ||
-         neq(lhs.get_tail(), rhs.get_tail());
-}
-template<>
-__host__ __device__
-inline bool neq<null_type,null_type>(const null_type&, const null_type&) { return false; }
-
-template<class T1, class T2>
-__host__ __device__
-inline bool lt(const T1& lhs, const T2& rhs) {
-  return (lhs.get_head() < rhs.get_head())  ||
-            (!(rhs.get_head() < lhs.get_head()) &&
-             lt(lhs.get_tail(), rhs.get_tail()));
-}
-template<>
-__host__ __device__
-inline bool lt<null_type,null_type>(const null_type&, const null_type&) { return false; }
-
-template<class T1, class T2>
-__host__ __device__
-inline bool gt(const T1& lhs, const T2& rhs) {
-  return (lhs.get_head() > rhs.get_head())  ||
-            (!(rhs.get_head() > lhs.get_head()) &&
-             gt(lhs.get_tail(), rhs.get_tail()));
-}
-template<>
-__host__ __device__
-inline bool gt<null_type,null_type>(const null_type&, const null_type&) { return false; }
-
-template<class T1, class T2>
-__host__ __device__
-inline bool lte(const T1& lhs, const T2& rhs) {
-  return lhs.get_head() <= rhs.get_head()  &&
-          ( !(rhs.get_head() <= lhs.get_head()) ||
-            lte(lhs.get_tail(), rhs.get_tail()));
-}
-template<>
-__host__ __device__
-inline bool lte<null_type,null_type>(const null_type&, const null_type&) { return true; }
-
-template<class T1, class T2>
-__host__ __device__
-inline bool gte(const T1& lhs, const T2& rhs) {
-  return lhs.get_head() >= rhs.get_head()  &&
-          ( !(rhs.get_head() >= lhs.get_head()) ||
-            gte(lhs.get_tail(), rhs.get_tail()));
-}
-template<>
-__host__ __device__
-inline bool gte<null_type,null_type>(const null_type&, const null_type&) { return true; }
-
-} // end detail
-
-
-
-// equal ----
-
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator==(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return  detail::eq(lhs, rhs);
-} // end operator==()
-
-// not equal -----
-
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator!=(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return detail::neq(lhs, rhs);
-} // end operator!=()
-
-// <
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator<(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return detail::lt(lhs, rhs);
-} // end operator<()
-
-// >
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator>(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return detail::gt(lhs, rhs);
-} // end operator>()
-
-// <=
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator<=(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return detail::lte(lhs, rhs);
-} // end operator<=()
-
-// >=
-template<class T1, class T2, class S1, class S2>
-__host__ __device__
-inline bool operator>=(const detail::cons<T1, T2>& lhs, const detail::cons<S1, S2>& rhs)
-{
-  // XXX support this eventually -jph
-  //// check that tuple lengths are equal
-  //BOOST_STATIC_ASSERT(tuple_size<T2>::value == tuple_size<S2>::value);
-
-  return detail::gte(lhs, rhs);
-} // end operator>=()
-
-} // end thrust
-
diff --git a/compat/thrust/detail/tuple_meta_transform.h b/compat/thrust/detail/tuple_meta_transform.h
deleted file mode 100644
index ff99709b6d..0000000000
--- a/compat/thrust/detail/tuple_meta_transform.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/tuple.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         unsigned int sz = thrust::tuple_size<Tuple>::value>
-  struct tuple_meta_transform;
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,0>
-{
-  typedef null_type type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,1>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,2>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,3>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,4>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,5>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,6>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<5,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,7>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<5,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<6,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,8>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<5,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<6,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<7,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,9>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<5,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<6,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<7,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<8,Tuple>::type>::type
-  > type;
-};
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction>
-  struct tuple_meta_transform<Tuple,UnaryMetaFunction,10>
-{
-  typedef thrust::tuple<
-    typename UnaryMetaFunction<typename thrust::tuple_element<0,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<1,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<2,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<3,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<4,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<5,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<6,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<7,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<8,Tuple>::type>::type,
-    typename UnaryMetaFunction<typename thrust::tuple_element<9,Tuple>::type>::type
-  > type;
-};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/tuple_transform.h b/compat/thrust/detail/tuple_transform.h
deleted file mode 100644
index f18b8727e1..0000000000
--- a/compat/thrust/detail/tuple_transform.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/tuple.h>
-#include <thrust/detail/tuple_meta_transform.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction,
-         unsigned int sz = thrust::tuple_size<Tuple>::value>
-  struct tuple_transform_functor;
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,0>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    return thrust::null_type();
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    return thrust::null_type();
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,1>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,2>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,3>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,4>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,5>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,6>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,7>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,8>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,9>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)),
-                     f(thrust::get<8>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)),
-                     f(thrust::get<8>(t)));
-  }
-};
-
-
-template<typename Tuple,
-         template<typename> class UnaryMetaFunction,
-         typename UnaryFunction>
-  struct tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction,10>
-{
-  static __host__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)),
-                     f(thrust::get<8>(t)),
-                     f(thrust::get<9>(t)));
-  }
-
-  static __host__ __device__
-  typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-  do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f)
-  {
-    typedef typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type XfrmTuple;
-
-    return XfrmTuple(f(thrust::get<0>(t)),
-                     f(thrust::get<1>(t)),
-                     f(thrust::get<2>(t)),
-                     f(thrust::get<3>(t)),
-                     f(thrust::get<4>(t)),
-                     f(thrust::get<5>(t)),
-                     f(thrust::get<6>(t)),
-                     f(thrust::get<7>(t)),
-                     f(thrust::get<8>(t)),
-                     f(thrust::get<9>(t)));
-  }
-};
-
-
-template<template<typename> class UnaryMetaFunction,
-         typename Tuple,
-         typename UnaryFunction>
-typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-tuple_host_transform(const Tuple &t, UnaryFunction f)
-{
-  return tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction>::do_it_on_the_host(t,f);
-}
-
-template<template<typename> class UnaryMetaFunction,
-         typename Tuple,
-         typename UnaryFunction>
-typename tuple_meta_transform<Tuple,UnaryMetaFunction>::type
-__host__ __device__
-tuple_host_device_transform(const Tuple &t, UnaryFunction f)
-{
-  return tuple_transform_functor<Tuple,UnaryMetaFunction,UnaryFunction>::do_it_on_the_host_or_device(t,f);
-}
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits.h b/compat/thrust/detail/type_traits.h
deleted file mode 100644
index 5dbeb906eb..0000000000
--- a/compat/thrust/detail/type_traits.h
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file type_traits.h
- *  \brief Temporarily define some type traits
- *         until nvcc can compile tr1::type_traits.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// XXX nvcc 2.2 closed beta can't compile type_traits
-//// find type_traits
-//
-//#ifdef __GNUC__
-//
-//#if __GNUC__ == 4 && __GNUC_MINOR__ == 2
-//#include <tr1/type_traits>
-//#elif __GNUC__ == 4 && __GNUC_MINOR__ > 2
-//#include <type_traits>
-//#endif // GCC version
-//
-//#endif // GCC
-//
-//#ifdef _MSC_VER
-//#include <type_traits>
-//#endif // MSVC
-
-
-namespace thrust
-{
-
-// forward declaration of device_reference
-template<typename T> class device_reference;
-
-namespace detail
-{
- /// helper classes [4.3].
- template<typename _Tp, _Tp __v>
-   struct integral_constant
-   {
-     static const _Tp                      value = __v;
-     typedef _Tp                           value_type;
-     typedef integral_constant<_Tp, __v>   type;
-   };
- 
- /// typedef for true_type
- typedef integral_constant<bool, true>     true_type;
-
- /// typedef for true_type
- typedef integral_constant<bool, false>    false_type;
-
-//template<typename T> struct is_integral : public std::tr1::is_integral<T> {};
-template<typename T> struct is_integral                           : public false_type {};
-template<>           struct is_integral<bool>                     : public true_type {};
-template<>           struct is_integral<char>                     : public true_type {};
-template<>           struct is_integral<signed char>              : public true_type {};
-template<>           struct is_integral<unsigned char>            : public true_type {};
-template<>           struct is_integral<short>                    : public true_type {};
-template<>           struct is_integral<unsigned short>           : public true_type {};
-template<>           struct is_integral<int>                      : public true_type {};
-template<>           struct is_integral<unsigned int>             : public true_type {};
-template<>           struct is_integral<long>                     : public true_type {};
-template<>           struct is_integral<unsigned long>            : public true_type {};
-template<>           struct is_integral<long long>                : public true_type {};
-template<>           struct is_integral<unsigned long long>       : public true_type {};
-template<>           struct is_integral<const bool>               : public true_type {};
-template<>           struct is_integral<const char>               : public true_type {};
-template<>           struct is_integral<const unsigned char>      : public true_type {};
-template<>           struct is_integral<const short>              : public true_type {};
-template<>           struct is_integral<const unsigned short>     : public true_type {};
-template<>           struct is_integral<const int>                : public true_type {};
-template<>           struct is_integral<const unsigned int>       : public true_type {};
-template<>           struct is_integral<const long>               : public true_type {};
-template<>           struct is_integral<const unsigned long>      : public true_type {};
-template<>           struct is_integral<const long long>          : public true_type {};
-template<>           struct is_integral<const unsigned long long> : public true_type {};
-
-template<typename T> struct is_floating_point              : public false_type {};
-template<>           struct is_floating_point<float>       : public true_type {};
-template<>           struct is_floating_point<double>      : public true_type {};
-template<>           struct is_floating_point<long double> : public true_type {};
-
-template<typename T> struct is_arithmetic               : public is_integral<T> {};
-template<>           struct is_arithmetic<float>        : public true_type {};
-template<>           struct is_arithmetic<double>       : public true_type {};
-template<>           struct is_arithmetic<const float>  : public true_type {};
-template<>           struct is_arithmetic<const double> : public true_type {};
-
-template<typename T> struct is_pointer      : public false_type {};
-template<typename T> struct is_pointer<T *> : public true_type  {};
-
-template<typename T> struct is_device_ptr  : public false_type {};
-
-template<typename T> struct is_void             : public false_type {};
-template<>           struct is_void<void>       : public true_type {};
-template<>           struct is_void<const void> : public true_type {};
-
-
-namespace tt_detail
-{
-
-
-} // end tt_detail
-
-template<typename T> struct is_pod
-   : public integral_constant<
-       bool,
-       is_void<T>::value || is_pointer<T>::value || is_arithmetic<T>::value
-#if THRUST_HOST_COMPILER   == THRUST_HOST_COMPILER_MSVC
-// use intrinsic type traits
-       || __is_pod(T)
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-// only use the intrinsic for >= 4.3
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3)
-       || __is_pod(T)
-#endif // GCC VERSION
-#endif // THRUST_HOST_COMPILER
-     >
- {};
-
-
-template<typename T> struct has_trivial_constructor
-  : public integral_constant<
-      bool,
-      is_pod<T>::value
-#if THRUST_HOST_COMPILER   == THRUST_HOST_COMPILER_MSVC
-      || __has_trivial_constructor(T)
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-// only use the intrinsic for >= 4.3
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3)
-      || __has_trivial_constructor(T)
-#endif // GCC VERSION
-#endif // THRUST_HOST_COMPILER
-      >
-{};
-
-template<typename T> struct has_trivial_copy_constructor
-  : public integral_constant<
-      bool,
-      is_pod<T>::value
-#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-      || __has_trivial_copy(T)
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-// only use the intrinsic for >= 4.3
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3)
-      || __has_trivial_copy(T)
-#endif // GCC VERSION
-#endif // THRUST_HOST_COMPILER
-    >
-{};
-
-template<typename T> struct has_trivial_destructor : public is_pod<T> {};
-
-template<typename T> struct is_const          : public false_type {};
-template<typename T> struct is_const<const T> : public true_type {};
-
-template<typename T> struct is_volatile             : public false_type {};
-template<typename T> struct is_volatile<volatile T> : public true_type {};
-
-template<typename T>
-  struct add_const
-{
-  typedef T const type;
-}; // end add_const
-
-template<typename T>
-  struct remove_const
-{
-  typedef T type;
-}; // end remove_const
-
-template<typename T>
-  struct remove_const<const T>
-{
-  typedef T type;
-}; // end remove_const
-
-template<typename T>
-  struct add_volatile
-{
-  typedef volatile T type;
-}; // end add_volatile
-
-template<typename T>
-  struct remove_volatile
-{
-  typedef T type;
-}; // end remove_volatile
-
-template<typename T>
-  struct remove_volatile<volatile T>
-{
-  typedef T type;
-}; // end remove_volatile
-
-template<typename T>
-  struct add_cv
-{
-  typedef const volatile T type;
-}; // end add_cv
-
-template<typename T>
-  struct remove_cv
-{
-  typedef typename remove_const<typename remove_volatile<T>::type>::type type;
-}; // end remove_cv
-
-
-template<typename T> struct is_reference     : public false_type {};
-template<typename T> struct is_reference<T&> : public true_type {};
-
-template<typename T> struct is_device_reference                                : public false_type {};
-template<typename T> struct is_device_reference< thrust::device_reference<T> > : public true_type {};
-
-
-// NB: Careful with reference to void.
-template<typename _Tp, bool = (is_void<_Tp>::value || is_reference<_Tp>::value)>
-  struct __add_reference_helper
-  { typedef _Tp&    type; };
-
-template<typename _Tp>
-  struct __add_reference_helper<_Tp, true>
-  { typedef _Tp     type; };
-
-template<typename _Tp>
-  struct add_reference
-    : public __add_reference_helper<_Tp>{};
-
-template<typename T>
-  struct remove_reference
-{
-  typedef T type;
-}; // end remove_reference
-
-template<typename T>
-  struct remove_reference<T&>
-{
-  typedef T type;
-}; // end remove_reference
-
-template<typename T1, typename T2>
-  struct is_same
-    : public false_type
-{
-}; // end is_same
-
-template<typename T>
-  struct is_same<T,T>
-    : public true_type
-{
-}; // end is_same
-
-template<typename T1, typename T2>
-  struct lazy_is_same
-    : is_same<typename T1::type, typename T2::type>
-{
-}; // end lazy_is_same
-
-template<typename T1, typename T2>
-  struct is_different
-    : public true_type
-{
-}; // end is_different
-
-template<typename T>
-  struct is_different<T,T>
-    : public false_type
-{
-}; // end is_different
-
-template<typename T1, typename T2>
-  struct lazy_is_different
-    : is_different<typename T1::type, typename T2::type>
-{
-}; // end lazy_is_different
-
-namespace tt_detail
-{
-
-template<typename T>
-  struct is_int_or_cref
-{
-  typedef typename remove_reference<T>::type type_sans_ref;
-  static const bool value = (is_integral<T>::value
-                             || (is_integral<type_sans_ref>::value
-                                 && is_const<type_sans_ref>::value
-                                 && !is_volatile<type_sans_ref>::value));
-}; // end is_int_or_cref
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN
-
-
-template<typename From, typename To>
-  struct is_convertible_sfinae
-{
-  private:
-    typedef char                          one_byte;
-    typedef struct { char two_chars[2]; } two_bytes;
-
-    static one_byte  test(To);
-    static two_bytes test(...);
-    static From      m_from;
-
-  public:
-    static const bool value = sizeof(test(m_from)) == sizeof(one_byte);
-}; // end is_convertible_sfinae
-
-
-__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
-
-template<typename From, typename To>
-  struct is_convertible_needs_simple_test
-{
-  static const bool from_is_void      = is_void<From>::value;
-  static const bool to_is_void        = is_void<To>::value;
-  static const bool from_is_float     = is_floating_point<typename remove_reference<From>::type>::value;
-  static const bool to_is_int_or_cref = is_int_or_cref<To>::value;
-
-  static const bool value = (from_is_void || to_is_void || (from_is_float && to_is_int_or_cref));
-}; // end is_convertible_needs_simple_test
-
-
-template<typename From, typename To,
-         bool = is_convertible_needs_simple_test<From,To>::value>
-  struct is_convertible
-{
-  static const bool value = (is_void<To>::value
-                             || (is_int_or_cref<To>::value
-                                 && !is_void<From>::value));
-}; // end is_convertible
-
-
-template<typename From, typename To>
-  struct is_convertible<From, To, false>
-{
-  static const bool value = (is_convertible_sfinae<typename
-                             add_reference<From>::type, To>::value);
-}; // end is_convertible
-
-
-} // end tt_detail
-
-template<typename From, typename To>
-  struct is_convertible
-    : public integral_constant<bool, tt_detail::is_convertible<From, To>::value>
-{
-}; // end is_convertible
-
-
-template<typename T1, typename T2>
-  struct is_one_convertible_to_the_other
-    : public integral_constant<
-        bool,
-        is_convertible<T1,T2>::value || is_convertible<T2,T1>::value
-      >
-{};
-
-
-// mpl stuff
-
-template <typename Condition1,               typename Condition2,              typename Condition3 = false_type,
-          typename Condition4  = false_type, typename Condition5 = false_type, typename Condition6 = false_type,
-          typename Condition7  = false_type, typename Condition8 = false_type, typename Condition9 = false_type,
-          typename Condition10 = false_type>
-  struct or_
-    : public integral_constant<
-        bool,
-        Condition1::value || Condition2::value || Condition3::value || Condition4::value || Condition5::value || Condition6::value || Condition7::value || Condition8::value || Condition9::value || Condition10::value
-      >
-{
-}; // end or_
-
-template <typename Condition1, typename Condition2, typename Condition3 = true_type>
-  struct and_
-    : public integral_constant<bool, Condition1::value && Condition2::value && Condition3::value>
-{
-}; // end and_
-
-template <typename Boolean>
-  struct not_
-    : public integral_constant<bool, !Boolean::value>
-{
-}; // end not_
-
-template <bool, typename Then, typename Else>
-  struct eval_if
-{
-}; // end eval_if
-
-template<typename Then, typename Else>
-  struct eval_if<true, Then, Else>
-{
-  typedef typename Then::type type;
-}; // end eval_if
-
-template<typename Then, typename Else>
-  struct eval_if<false, Then, Else>
-{
-  typedef typename Else::type type;
-}; // end eval_if
-
-template<typename T>
-//  struct identity
-//  XXX WAR nvcc's confusion with thrust::identity
-  struct identity_
-{
-  typedef T type;
-}; // end identity
-
-template<bool, typename T = void> struct enable_if {};
-template<typename T>              struct enable_if<true, T> {typedef T type;};
-
-template<bool, typename T> struct lazy_enable_if {};
-template<typename T>       struct lazy_enable_if<true, T> {typedef typename T::type type;};
-
-template<bool condition, typename T = void> struct disable_if : enable_if<!condition, T> {};
-template<bool condition, typename T>        struct lazy_disable_if : lazy_enable_if<!condition, T> {};
-
-
-template<typename T1, typename T2, typename T = void>
-  struct enable_if_convertible
-    : enable_if< is_convertible<T1,T2>::value, T >
-{};
-
-
-template<typename T1, typename T2, typename T = void>
-  struct disable_if_convertible
-    : disable_if< is_convertible<T1,T2>::value, T >
-{};
-
-
-template<typename T1, typename T2, typename Result = void>
-  struct enable_if_different
-    : enable_if<is_different<T1,T2>::value, Result>
-{};
-
-
-template<typename T>
-  struct is_numeric
-    : and_<
-        is_convertible<int,T>,
-        is_convertible<T,int>
-      >
-{
-}; // end is_numeric
-
-
-template<typename> struct is_reference_to_const             : false_type {};
-template<typename T> struct is_reference_to_const<const T&> : true_type {};
-
-
-// make_unsigned follows
-
-namespace tt_detail
-{
-
-template<typename T> struct make_unsigned_simple;
-
-template<> struct make_unsigned_simple<char>                   { typedef unsigned char          type; };
-template<> struct make_unsigned_simple<signed char>            { typedef signed   char          type; };
-template<> struct make_unsigned_simple<unsigned char>          { typedef unsigned char          type; };
-template<> struct make_unsigned_simple<short>                  { typedef unsigned short         type; };
-template<> struct make_unsigned_simple<unsigned short>         { typedef unsigned short         type; };
-template<> struct make_unsigned_simple<int>                    { typedef unsigned int           type; };
-template<> struct make_unsigned_simple<unsigned int>           { typedef unsigned int           type; };
-template<> struct make_unsigned_simple<long int>               { typedef unsigned long int      type; };
-template<> struct make_unsigned_simple<unsigned long int>      { typedef unsigned long int      type; };
-template<> struct make_unsigned_simple<long long int>          { typedef unsigned long long int type; };
-template<> struct make_unsigned_simple<unsigned long long int> { typedef unsigned long long int type; };
-
-template<typename T>
-  struct make_unsigned_base
-{
-  // remove cv
-  typedef typename remove_cv<T>::type remove_cv_t;
-
-  // get the simple unsigned type
-  typedef typename make_unsigned_simple<remove_cv_t>::type unsigned_remove_cv_t;
-
-  // add back const, volatile, both, or neither to the simple result
-  typedef typename eval_if<
-    is_const<T>::value && is_volatile<T>::value,
-    // add cv back
-    add_cv<unsigned_remove_cv_t>,
-    // check const & volatile individually
-    eval_if<
-      is_const<T>::value,
-      // add c back
-      add_const<unsigned_remove_cv_t>,
-      eval_if<
-        is_volatile<T>::value,
-        // add v back
-        add_volatile<unsigned_remove_cv_t>,
-        // original type was neither cv, return the simple unsigned result
-        identity_<unsigned_remove_cv_t>
-      >
-    >
-  >::type type;
-};
-
-} // end tt_detail
-
-template<typename T>
-  struct make_unsigned
-    : tt_detail::make_unsigned_base<T>
-{};
-
-struct largest_available_float
-{
-#if defined(__CUDA_ARCH__)
-#  if (__CUDA_ARCH__ < 130)
-  typedef float type;
-#  else
-  typedef double type;
-#  endif
-#else
-  typedef double type;
-#endif
-};
-
-// T1 wins if they are both the same size
-template<typename T1, typename T2>
-  struct larger_type
-    : thrust::detail::eval_if<
-        (sizeof(T2) > sizeof(T1)),
-        thrust::detail::identity_<T2>,
-        thrust::detail::identity_<T1>
-      >
-{};
-
-
-namespace is_base_of_ns
-{
-
-typedef char                          yes;
-typedef struct { char two_chars[2]; } no;
-
-template<typename Base, typename Derived>
-  struct host
-{
-  operator Base*() const;
-  operator Derived*();
-}; // end host
-
-template<typename Base, typename Derived>
-  struct impl
-{
-  template<typename T> static yes check(Derived *, T);
-  static no check(Base*, int);
-
-  static const bool value = sizeof(check(host<Base,Derived>(), int())) == sizeof(yes);
-}; // end impl
-
-} // end is_base_of_ns
-
-
-template<typename Base, typename Derived>
-  struct is_base_of
-    : integral_constant<
-        bool,
-        is_base_of_ns::impl<Base,Derived>::value
-      >
-{};
-
-template<typename Base, typename Derived, typename Result = void>
-  struct enable_if_base_of
-    : enable_if<
-        is_base_of<Base,Derived>::value,
-        Result
-      >
-{};
-
-
-namespace is_assignable_ns
-{
-
-template<typename T1, typename T2>
-  class is_assignable
-{
-  typedef char                      yes_type;
-  typedef struct { char array[2]; } no_type;
-
-  template<typename T> static typename add_reference<T>::type declval();
-  
-  template<unsigned int> struct helper { typedef void * type; };
-
-  template<typename U1, typename U2> static yes_type test(typename helper<sizeof(declval<U1>() = declval<U2>())>::type);
-
-  template<typename,typename> static no_type test(...);
-
-  public:
-    static const bool value = sizeof(test<T1,T2>(0)) == 1;
-}; // end is_assignable
-
-} // end is_assignable_ns
-
-
-template<typename T1, typename T2>
-  struct is_assignable
-    : integral_constant<
-        bool,
-        is_assignable_ns::is_assignable<T1,T2>::value
-      >
-{};
-
-
-template<typename T>
-  struct is_copy_assignable
-    : is_assignable<
-        typename add_reference<T>::type,
-        typename add_reference<typename add_const<T>::type>::type
-      >
-{};
-
-
-} // end detail
-
-} // end thrust
-
-#include <thrust/detail/type_traits/has_trivial_assign.h>
-
diff --git a/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h b/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h
deleted file mode 100644
index 92767b5497..0000000000
--- a/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// this trait reports what type should be used as a temporary in certain algorithms
-// which aggregate intermediate results from a function before writing to an output iterator
-
-// the pseudocode for deducing the type of the temporary used below:
-// 
-// if Function is an AdaptableFunction
-//   result = Function::result_type
-// else if OutputIterator2 is a "pure" output iterator
-//   result = InputIterator2::value_type
-// else
-//   result = OutputIterator2::value_type
-//
-// XXX upon c++0x, TemporaryType needs to be:
-// result_of<BinaryFunction>::type
-template<typename InputIterator, typename OutputIterator, typename Function>
-  struct intermediate_type_from_function_and_iterators
-    : eval_if<
-        has_result_type<Function>::value,
-        result_type<Function>,
-        eval_if<
-          is_output_iterator<OutputIterator>::value,
-          thrust::iterator_value<InputIterator>,
-          thrust::iterator_value<OutputIterator>
-        >
-      >
-{
-}; // end intermediate_type_from_function_and_iterators
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/function_traits.h b/compat/thrust/detail/type_traits/function_traits.h
deleted file mode 100644
index 39015c608d..0000000000
--- a/compat/thrust/detail/type_traits/function_traits.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/has_nested_type.h>
-
-namespace thrust
-{
-
-// forward definitions for is_commutative
-template <typename T> struct plus;
-template <typename T> struct multiplies;
-template <typename T> struct minimum;
-template <typename T> struct maximum;
-template <typename T> struct logical_or;
-template <typename T> struct logical_and;
-template <typename T> struct bit_or;
-template <typename T> struct bit_and;
-template <typename T> struct bit_xor;
-
-namespace detail
-{
-
-
-// some metafunctions which check for the nested types of the adaptable functions
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_result_type, result_type)
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_argument_type, argument_type)
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_first_argument_type, first_argument_type)
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_second_argument_type, second_argument_type)
-
-
-template<typename AdaptableBinaryFunction>
-  struct result_type
-{
-  typedef typename AdaptableBinaryFunction::result_type type;
-};
-
-
-template<typename T>
-  struct is_adaptable_unary_function
-    : thrust::detail::and_<
-        has_result_type<T>,
-        has_argument_type<T>
-      >
-{};
-
-
-template<typename T>
-  struct is_adaptable_binary_function
-    : thrust::detail::and_<
-        has_result_type<T>,
-        thrust::detail::and_<
-          has_first_argument_type<T>,
-          has_second_argument_type<T>
-        >
-      >
-{};
-
-
-template<typename BinaryFunction>
-  struct is_commutative
-    : public thrust::detail::false_type
-{};
-
-template<typename T> struct is_commutative< typename thrust::plus<T>        > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::multiplies<T>  > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::minimum<T>     > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::maximum<T>     > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::logical_or<T>  > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::logical_and<T> > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::bit_or<T>      > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::bit_and<T>     > : public thrust::detail::is_arithmetic<T> {};
-template<typename T> struct is_commutative< typename thrust::bit_xor<T>     > : public thrust::detail::is_arithmetic<T> {};
-
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/type_traits/has_member_function.h b/compat/thrust/detail/type_traits/has_member_function.h
deleted file mode 100644
index 117f4cb9bf..0000000000
--- a/compat/thrust/detail/type_traits/has_member_function.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-
-#define __THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name, member_function_name)                                \
-template<typename T, typename Signature> class trait_name;                                                   \
-                                                                                                             \
-template<typename T, typename Result>                                                                        \
-class trait_name<T, Result(void)>                                                                            \
-{                                                                                                            \
-   class yes { char m; };                                                                                    \
-   class no { yes m[2]; };                                                                                   \
-   struct base_mixin                                                                                         \
-   {                                                                                                         \
-     Result member_function_name();                                                                          \
-   };                                                                                                        \
-   struct base : public T, public base_mixin {};                                                             \
-   template <typename U, U t>  class helper{};                                                               \
-   template <typename U>                                                                                     \
-   static no deduce(U*, helper<Result (base_mixin::*)(), &U::member_function_name>* = 0);                    \
-   static yes deduce(...);                                                                                   \
-public:                                                                                                      \
-   static const bool value = sizeof(yes) == sizeof(deduce(static_cast<base*>(0)));                           \
-   typedef thrust::detail::integral_constant<bool,value> type;                                               \
-};                                                                                                           \
-                                                                                                             \
-template<typename T, typename Result, typename Arg>                                                          \
-class trait_name<T, Result(Arg)>                                                                             \
-{                                                                                                            \
-   class yes { char m; };                                                                                    \
-   class no { yes m[2]; };                                                                                   \
-   struct base_mixin                                                                                         \
-   {                                                                                                         \
-     Result member_function_name(Arg);                                                                       \
-   };                                                                                                        \
-   struct base : public T, public base_mixin {};                                                             \
-   template <typename U, U t>  class helper{};                                                               \
-   template <typename U>                                                                                     \
-   static no deduce(U*, helper<Result (base_mixin::*)(Arg), &U::member_function_name>* = 0);                 \
-   static yes deduce(...);                                                                                   \
-public:                                                                                                      \
-   static const bool value = sizeof(yes) == sizeof(deduce(static_cast<base*>(0)));                           \
-   typedef thrust::detail::integral_constant<bool,value> type;                                               \
-};                                                                                                           \
-                                                                                                             \
-template<typename T, typename Result, typename Arg1, typename Arg2>                                          \
-class trait_name<T, Result(Arg1,Arg2)>                                                                       \
-{                                                                                                            \
-   class yes { char m; };                                                                                    \
-   class no { yes m[2]; };                                                                                   \
-   struct base_mixin                                                                                         \
-   {                                                                                                         \
-     Result member_function_name(Arg1,Arg2);                                                                 \
-   };                                                                                                        \
-   struct base : public T, public base_mixin {};                                                             \
-   template <typename U, U t>  class helper{};                                                               \
-   template <typename U>                                                                                     \
-   static no deduce(U*, helper<Result (base_mixin::*)(Arg1,Arg2), &U::member_function_name>* = 0);           \
-   static yes deduce(...);                                                                                   \
-public:                                                                                                      \
-   static const bool value = sizeof(yes) == sizeof(deduce(static_cast<base*>(0)));                           \
-   typedef thrust::detail::integral_constant<bool,value> type;                                               \
-};                                                                                                           \
-                                                                                                             \
-template<typename T, typename Result, typename Arg1, typename Arg2, typename Arg3>                           \
-class trait_name<T, Result(Arg1,Arg2,Arg3)>                                                                  \
-{                                                                                                            \
-   class yes { char m; };                                                                                    \
-   class no { yes m[2]; };                                                                                   \
-   struct base_mixin                                                                                         \
-   {                                                                                                         \
-     Result member_function_name(Arg1,Arg2,Arg3);                                                            \
-   };                                                                                                        \
-   struct base : public T, public base_mixin {};                                                             \
-   template <typename U, U t>  class helper{};                                                               \
-   template <typename U>                                                                                     \
-   static no deduce(U*, helper<Result (base_mixin::*)(Arg1,Arg2,Arg3), &U::member_function_name>* = 0);      \
-   static yes deduce(...);                                                                                   \
-public:                                                                                                      \
-   static const bool value = sizeof(yes) == sizeof(deduce(static_cast<base*>(0)));                           \
-   typedef thrust::detail::integral_constant<bool,value> type;                                               \
-};                                                                                                           \
-                                                                                                             \
-template<typename T, typename Result, typename Arg1, typename Arg2, typename Arg3, typename Arg4>            \
-class trait_name<T, Result(Arg1,Arg2,Arg3,Arg4)>                                                             \
-{                                                                                                            \
-   class yes { char m; };                                                                                    \
-   class no { yes m[2]; };                                                                                   \
-   struct base_mixin                                                                                         \
-   {                                                                                                         \
-     Result member_function_name(Arg1,Arg2,Arg3,Arg4);                                                       \
-   };                                                                                                        \
-   struct base : public T, public base_mixin {};                                                             \
-   template <typename U, U t>  class helper{};                                                               \
-   template <typename U>                                                                                     \
-   static no deduce(U*, helper<Result (base_mixin::*)(Arg1,Arg2,Arg3,Arg4), &U::member_function_name>* = 0); \
-   static yes deduce(...);                                                                                   \
-public:                                                                                                      \
-   static const bool value = sizeof(yes) == sizeof(deduce(static_cast<base*>(0)));                           \
-   typedef thrust::detail::integral_constant<bool,value> type;                                               \
-};                                                                                                           
-
diff --git a/compat/thrust/detail/type_traits/has_nested_type.h b/compat/thrust/detail/type_traits/has_nested_type.h
deleted file mode 100644
index 98c9460500..0000000000
--- a/compat/thrust/detail/type_traits/has_nested_type.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-
-#define __THRUST_DEFINE_HAS_NESTED_TYPE(trait_name, nested_type_name) \
-template<typename T> \
-  struct trait_name  \
-{                    \
-  typedef char yes_type; \
-  typedef int  no_type;  \
-  template<typename S> static yes_type test(typename S::nested_type_name *); \
-  template<typename S> static no_type  test(...); \
-  static bool const value = sizeof(test<T>(0)) == sizeof(yes_type);\
-  typedef thrust::detail::integral_constant<bool, value> type;\
-};
-
diff --git a/compat/thrust/detail/type_traits/has_trivial_assign.h b/compat/thrust/detail/type_traits/has_trivial_assign.h
deleted file mode 100644
index d248245e84..0000000000
--- a/compat/thrust/detail/type_traits/has_trivial_assign.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file type_traits.h
- *  \brief Temporarily define some type traits
- *         until nvcc can compile tr1::type_traits.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename T> struct has_trivial_assign
-  : public integral_constant<
-      bool,
-      (is_pod<T>::value && !is_const<T>::value)
-#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-      || __has_trivial_assign(T)
-#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC
-// only use the intrinsic for >= 4.3
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3)
-      || __has_trivial_assign(T)
-#endif // GCC VERSION
-#endif // THRUST_HOST_COMPILER
-    >
-{};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/is_call_possible.h b/compat/thrust/detail/type_traits/is_call_possible.h
deleted file mode 100644
index 41b9539e19..0000000000
--- a/compat/thrust/detail/type_traits/is_call_possible.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/has_member_function.h>
-
-// inspired by Roman Perepelitsa's presentation from comp.lang.c++.moderated
-// based on the implementation here: http://www.rsdn.ru/forum/cpp/2759773.1.aspx
-
-namespace thrust
-{
-namespace detail
-{
-namespace is_call_possible_detail
-{
-
-template<typename T> class void_exp_result {}; 
-
-template<typename T, typename U> 
-U const& operator,(U const&, void_exp_result<T>); 
-
-template<typename T, typename U> 
-U& operator,(U&, void_exp_result<T>); 
-
-template<typename src_type, typename dest_type> 
-struct clone_constness 
-{
-  typedef dest_type type; 
-}; 
-
-template<typename src_type, typename dest_type> 
-struct clone_constness<const src_type, dest_type> 
-{ 
-  typedef const dest_type type; 
-};
-
-} // end is_call_possible_detail
-} // end detail
-} // end thrust
-
-#define __THRUST_DEFINE_IS_CALL_POSSIBLE(trait_name, member_function_name)                                                                \
-__THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name##_has_member, member_function_name)                                                        \
-                                                                                                                                          \
-template <typename T, typename Signature>                                                                                                 \
-struct trait_name                                                                                                                         \
-{                                                                                                                                         \
-  private:                                                                                                                                \
-    struct yes {};                                                                                                                        \
-    struct no { yes m[2]; };                                                                                                              \
-    struct derived : public T                                                                                                             \
-    {                                                                                                                                     \
-      using T::member_function_name;                                                                                                      \
-      no member_function_name(...) const;                                                                                                 \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    typedef typename thrust::detail::is_call_possible_detail::clone_constness<T, derived>::type derived_type;                             \
-                                                                                                                                          \
-    template<typename U, typename Result>                                                                                                 \
-    struct return_value_check                                                                                                             \
-    {                                                                                                                                     \
-      static yes deduce(Result);                                                                                                          \
-      static no deduce(...);                                                                                                              \
-      static no deduce(no);                                                                                                               \
-      static no deduce(thrust::detail::is_call_possible_detail::void_exp_result<T>);                                                      \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<typename U>                                                                                                                  \
-    struct return_value_check<U, void>                                                                                                    \
-    {                                                                                                                                     \
-      static yes deduce(...);                                                                                                             \
-      static no deduce(no);                                                                                                               \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<bool has_the_member_of_interest, typename F>                                                                                 \
-    struct impl                                                                                                                           \
-    {                                                                                                                                     \
-      static const bool value = false;                                                                                                    \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<typename Result, typename Arg>                                                                                               \
-    struct impl<true, Result(Arg)>                                                                                                        \
-    {                                                                                                                                     \
-      static typename add_reference<derived_type>::type test_me;                                                                          \
-      static typename add_reference<Arg>::type          arg;                                                                              \
-                                                                                                                                          \
-      static const bool value =                                                                                                           \
-        sizeof(                                                                                                                           \
-                return_value_check<T, Result>::deduce(                                                                                    \
-                  (test_me.member_function_name(arg), thrust::detail::is_call_possible_detail::void_exp_result<T>())                      \
-                )                                                                                                                         \
-              ) == sizeof(yes);                                                                                                           \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<typename Result, typename Arg1, typename Arg2>                                                                               \
-    struct impl<true, Result(Arg1,Arg2)>                                                                                                  \
-    {                                                                                                                                     \
-      static typename add_reference<derived_type>::type test_me;                                                                          \
-      static typename add_reference<Arg1>::type         arg1;                                                                             \
-      static typename add_reference<Arg2>::type         arg2;                                                                             \
-                                                                                                                                          \
-      static const bool value =                                                                                                           \
-        sizeof(                                                                                                                           \
-                return_value_check<T, Result>::deduce(                                                                                    \
-                  (test_me.member_function_name(arg1,arg2), thrust::detail::is_call_possible_detail::void_exp_result<T>())                \
-                )                                                                                                                         \
-              ) == sizeof(yes);                                                                                                           \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<typename Result, typename Arg1, typename Arg2, typename Arg3>                                                                \
-    struct impl<true, Result(Arg1,Arg2,Arg3)>                                                                                             \
-    {                                                                                                                                     \
-      static typename add_reference<derived_type>::type test_me;                                                                          \
-      static typename add_reference<Arg1>::type         arg1;                                                                             \
-      static typename add_reference<Arg2>::type         arg2;                                                                             \
-      static typename add_reference<Arg3>::type         arg3;                                                                             \
-                                                                                                                                          \
-      static const bool value =                                                                                                           \
-        sizeof(                                                                                                                           \
-                return_value_check<T, Result>::deduce(                                                                                    \
-                  (test_me.member_function_name(arg1,arg2,arg3), thrust::detail::is_call_possible_detail::void_exp_result<T>())           \
-                )                                                                                                                         \
-              ) == sizeof(yes);                                                                                                           \
-    };                                                                                                                                    \
-                                                                                                                                          \
-    template<typename Result, typename Arg1, typename Arg2, typename Arg3, typename Arg4>                                                 \
-    struct impl<true, Result(Arg1,Arg2,Arg3,Arg4)>                                                                                        \
-    {                                                                                                                                     \
-      static typename add_reference<derived_type>::type test_me;                                                                          \
-      static typename add_reference<Arg1>::type         arg1;                                                                             \
-      static typename add_reference<Arg2>::type         arg2;                                                                             \
-      static typename add_reference<Arg3>::type         arg3;                                                                             \
-      static typename add_reference<Arg4>::type         arg4;                                                                             \
-                                                                                                                                          \
-      static const bool value =                                                                                                           \
-        sizeof(                                                                                                                           \
-                return_value_check<T, Result>::deduce(                                                                                    \
-                  (test_me.member_function_name(arg1,arg2,arg3,arg4), thrust::detail::is_call_possible_detail::void_exp_result<T>())      \
-                )                                                                                                                         \
-              ) == sizeof(yes);                                                                                                           \
-    };                                                                                                                                    \
-                                                                                                                                          \
-  public:                                                                                                                                 \
-    static const bool value = impl<trait_name##_has_member<T,Signature>::value, Signature>::value;                                        \
-    typedef thrust::detail::integral_constant<bool,value> type;                                                                           \
-}; 
-
diff --git a/compat/thrust/detail/type_traits/is_metafunction_defined.h b/compat/thrust/detail/type_traits/is_metafunction_defined.h
deleted file mode 100644
index fba0811fb3..0000000000
--- a/compat/thrust/detail/type_traits/is_metafunction_defined.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits/has_nested_type.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-__THRUST_DEFINE_HAS_NESTED_TYPE(is_metafunction_defined, type)
-
-template<typename Metafunction>
-  struct enable_if_defined
-    : thrust::detail::lazy_enable_if<
-        is_metafunction_defined<Metafunction>::value,
-        Metafunction
-      >
-{};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h b/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h
deleted file mode 100644
index cca59da045..0000000000
--- a/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/discard_iterator.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template <typename Iterator>
-struct is_discard_iterator
-  : public thrust::detail::false_type
-{};
-
-template <typename System>
-struct is_discard_iterator< thrust::discard_iterator<System> >
- : public thrust::detail::true_type
-{};
-
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/type_traits/iterator/is_output_iterator.h b/compat/thrust/detail/type_traits/iterator/is_output_iterator.h
deleted file mode 100644
index 4cefe6353a..0000000000
--- a/compat/thrust/detail/type_traits/iterator/is_output_iterator.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/is_metafunction_defined.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/any_assign.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-
-template<typename T>
-  struct is_void_like
-    : thrust::detail::or_<
-        thrust::detail::is_void<T>,
-        thrust::detail::is_same<T,thrust::detail::any_assign>
-      >
-{}; // end is_void_like
-
-
-template<typename T>
-  struct lazy_is_void_like
-    : is_void_like<typename T::type>
-{}; // end lazy_is_void_like
-
-
-// XXX this meta function should first check that T is actually an iterator
-//
-//     if thrust::iterator_value<T> is defined and thrust::iterator_value<T>::type == void
-//       return false
-//     else
-//       return true
-template<typename T>
-  struct is_output_iterator
-    : eval_if<
-        is_metafunction_defined<thrust::iterator_value<T> >::value,
-        lazy_is_void_like<thrust::iterator_value<T> >,
-        thrust::detail::true_type
-      >::type
-{
-}; // end is_output_iterator
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/minimum_type.h b/compat/thrust/detail/type_traits/minimum_type.h
deleted file mode 100644
index aaa011ec8d..0000000000
--- a/compat/thrust/detail/type_traits/minimum_type.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{ 
-
-namespace minimum_type_detail
-{
-
-//
-// Returns the minimum type or is empty
-// if T1 and T2 are unrelated.
-//
-template <typename T1, typename T2, bool GreaterEqual, bool LessEqual> struct minimum_type_impl {};
-  
-template <typename T1, typename T2>
-struct minimum_type_impl<T1,T2,true,false>
-{
-  typedef T2 type;
-}; // end minimum_type_impl
-
-template <typename T1, typename T2>
-struct minimum_type_impl<T1,T2,false,true>
-{
-  typedef T1 type;
-}; // end minimum_type_impl
-
-template <typename T1, typename T2>
-struct minimum_type_impl<T1,T2,true,true>
-{
-  typedef T1 type;
-}; // end minimum_type_impl
-
-template <typename T1, typename T2>
-struct primitive_minimum_type
-  : minimum_type_detail::minimum_type_impl<
-      T1,
-      T2,
-      ::thrust::detail::is_convertible<T1,T2>::value,
-      ::thrust::detail::is_convertible<T2,T1>::value
-    >
-{
-}; // end primitive_minimum_type
-
-// because some types are not convertible (even to themselves)
-// specialize primitive_minimum_type for when both types are identical
-template <typename T>
-struct primitive_minimum_type<T,T>
-{
-  typedef T type;
-}; // end primitive_minimum_type
-
-// XXX this belongs somewhere more general
-struct any_conversion
-{
-  template<typename T> operator T (void);
-};
-
-} // end minimum_type_detail
-
-template<typename T1,
-         typename T2  = minimum_type_detail::any_conversion,
-         typename T3  = minimum_type_detail::any_conversion,
-         typename T4  = minimum_type_detail::any_conversion,
-         typename T5  = minimum_type_detail::any_conversion,
-         typename T6  = minimum_type_detail::any_conversion,
-         typename T7  = minimum_type_detail::any_conversion,
-         typename T8  = minimum_type_detail::any_conversion,
-         typename T9  = minimum_type_detail::any_conversion,
-         typename T10 = minimum_type_detail::any_conversion,
-         typename T11 = minimum_type_detail::any_conversion,
-         typename T12 = minimum_type_detail::any_conversion,
-         typename T13 = minimum_type_detail::any_conversion,
-         typename T14 = minimum_type_detail::any_conversion,
-         typename T15 = minimum_type_detail::any_conversion,
-         typename T16 = minimum_type_detail::any_conversion>
-  struct minimum_type;
-
-// base case
-template<typename T1, typename T2>
-  struct minimum_type<T1,T2>
-    : minimum_type_detail::primitive_minimum_type<T1,T2>
-{};
-
-template<typename T1, typename T2>
-  struct lazy_minimum_type
-    : minimum_type<
-        typename T1::type,
-        typename T2::type
-      >
-{};
-
-// carefully avoid referring to a nested ::type which may not exist
-template<typename T1,  typename T2,  typename T3,  typename T4,
-         typename T5,  typename T6,  typename T7,  typename T8,
-         typename T9,  typename T10, typename T11, typename T12,
-         typename T13, typename T14, typename T15, typename T16>
-  struct minimum_type
-    : lazy_minimum_type<
-        lazy_minimum_type<
-          lazy_minimum_type<
-            minimum_type<
-              T1,T2
-            >,
-            minimum_type<
-              T3,T4
-            >
-          >,
-          lazy_minimum_type<
-            minimum_type<
-              T5,T6
-            >,
-            minimum_type<
-              T7,T8
-            >
-          >
-        >,
-        lazy_minimum_type<
-          lazy_minimum_type<
-            minimum_type<
-              T9,T10
-            >,
-            minimum_type<
-              T11,T12
-            >
-          >,
-          lazy_minimum_type<
-            minimum_type<
-              T13,T14
-            >,
-            minimum_type<
-              T15,T16
-            >
-          >
-        >
-      >
-{};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/pointer_traits.h b/compat/thrust/detail/type_traits/pointer_traits.h
deleted file mode 100644
index a0b5dc625c..0000000000
--- a/compat/thrust/detail/type_traits/pointer_traits.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/is_metafunction_defined.h>
-#include <thrust/detail/type_traits/has_nested_type.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <cstddef>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename Ptr> struct pointer_element;
-
-template<template<typename> class Ptr, typename Arg>
-  struct pointer_element<Ptr<Arg> >
-{
-  typedef Arg type;
-};
-
-template<template<typename,typename> class Ptr, typename Arg1, typename Arg2>
-  struct pointer_element<Ptr<Arg1,Arg2> >
-{
-  typedef Arg1 type;
-};
-
-template<template<typename,typename,typename> class Ptr, typename Arg1, typename Arg2, typename Arg3>
-  struct pointer_element<Ptr<Arg1,Arg2,Arg3> >
-{
-  typedef Arg1 type;
-};
-
-template<template<typename,typename,typename,typename> class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4>
-  struct pointer_element<Ptr<Arg1,Arg2,Arg3,Arg4> >
-{
-  typedef Arg1 type;
-};
-
-template<template<typename,typename,typename,typename,typename> class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5>
-  struct pointer_element<Ptr<Arg1,Arg2,Arg3,Arg4,Arg5> >
-{
-  typedef Arg1 type;
-};
-
-template<typename T>
-  struct pointer_element<T*>
-{
-  typedef T type;
-};
-
-template<typename Ptr>
-  struct pointer_difference
-{
-  typedef typename Ptr::difference_type type;
-};
-
-template<typename T>
-  struct pointer_difference<T*>
-{
-  typedef std::ptrdiff_t type;
-};
-
-template<typename Ptr, typename T> struct rebind_pointer;
-
-template<typename T, typename U>
-  struct rebind_pointer<T*,U>
-{
-  typedef U* type;
-};
-
-template<template<typename> class Ptr, typename Arg, typename T>
-  struct rebind_pointer<Ptr<Arg>,T>
-{
-  typedef Ptr<T> type;
-};
-
-template<template<typename, typename> class Ptr, typename Arg1, typename Arg2, typename T>
-  struct rebind_pointer<Ptr<Arg1,Arg2>,T>
-{
-  typedef Ptr<T,Arg2> type;
-};
-
-template<template<typename, typename, typename> class Ptr, typename Arg1, typename Arg2, typename Arg3, typename T>
-  struct rebind_pointer<Ptr<Arg1,Arg2,Arg3>,T>
-{
-  typedef Ptr<T,Arg2,Arg3> type;
-};
-
-template<template<typename, typename, typename, typename> class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename T>
-  struct rebind_pointer<Ptr<Arg1,Arg2,Arg3,Arg4>,T>
-{
-  typedef Ptr<T,Arg2,Arg3,Arg4> type;
-};
-
-// XXX this should probably be renamed native_type or similar
-__THRUST_DEFINE_HAS_NESTED_TYPE(has_raw_pointer, raw_pointer)
-
-namespace pointer_traits_detail
-{
-
-template<typename Ptr, typename Enable = void> struct pointer_raw_pointer_impl {};
-
-template<typename T>
-  struct pointer_raw_pointer_impl<T*>
-{
-  typedef T* type;
-};
-
-template<typename Ptr>
-  struct pointer_raw_pointer_impl<Ptr, typename enable_if<has_raw_pointer<Ptr>::value>::type>
-{
-  typedef typename Ptr::raw_pointer type;
-};
-
-} // end pointer_traits_detail
-
-template<typename T>
-  struct pointer_raw_pointer
-    : pointer_traits_detail::pointer_raw_pointer_impl<T>
-{};
-
-namespace pointer_traits_detail
-{
-
-template<typename Void>
-  struct capture_address
-{
-  template<typename T>
-  __host__ __device__
-  capture_address(T &r)
-    : m_addr(&r)
-  {}
-
-  inline __host__ __device__
-  Void *operator&() const
-  {
-    return m_addr;
-  }
-
-  Void *m_addr;
-};
-
-// metafunction to compute the type of pointer_to's parameter below
-template<typename T>
-  struct pointer_to_param
-    : thrust::detail::eval_if<
-        thrust::detail::is_void<T>::value,
-        thrust::detail::identity_<capture_address<T> >,
-        thrust::detail::add_reference<T>
-      >
-{};
-
-}
-
-template<typename Ptr>
-  struct pointer_traits
-{
-  typedef Ptr                                    pointer;
-  typedef typename pointer_element<Ptr>::type    element_type;
-  typedef typename pointer_difference<Ptr>::type difference_type;
-
-  template<typename U>
-    struct rebind 
-  {
-    typedef typename rebind_pointer<Ptr,U>::type other;
-  };
-
-  __host__ __device__
-  inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param<element_type>::type r)
-  {
-    // XXX this is supposed to be pointer::pointer_to(&r); (i.e., call a static member function of pointer called pointer_to)
-    //     assume that pointer has a constructor from raw pointer instead
-    
-    return pointer(&r);
-  }
-
-  // thrust additions follow
-  typedef typename pointer_raw_pointer<Ptr>::type raw_pointer;
-
-  __host__ __device__
-  inline static raw_pointer get(pointer ptr)
-  {
-    return ptr.get();
-  }
-};
-
-template<typename T>
-  struct pointer_traits<T*>
-{
-  typedef T*                                    pointer;
-  typedef T                                     element_type;
-  typedef typename pointer_difference<T*>::type difference_type;
-
-  template<typename U>
-    struct rebind
-  {
-    typedef U* other;
-  };
-
-  __host__ __device__
-  inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param<element_type>::type r)
-  {
-    return &r;
-  }
-
-  // thrust additions follow
-  typedef typename pointer_raw_pointer<T*>::type raw_pointer;
-
-  __host__ __device__
-  inline static raw_pointer get(pointer ptr)
-  {
-    return ptr;
-  }
-};
-
-template<typename FromPtr, typename ToPtr>
-  struct is_pointer_convertible
-    : thrust::detail::and_<
-        thrust::detail::is_convertible<
-          typename pointer_element<FromPtr>::type *,
-          typename pointer_element<ToPtr>::type *
-        >,
-        thrust::detail::is_convertible<
-          typename iterator_system<FromPtr>::type,
-          typename iterator_system<ToPtr>::type
-        >
-      >
-{};
-
-// this could be a lot better, but for our purposes, it's probably
-// sufficient just to check if pointer_raw_pointer<T> has meaning
-template<typename T>
-  struct is_thrust_pointer
-    : is_metafunction_defined<pointer_raw_pointer<T> >
-{};
-
-// avoid inspecting traits of the arguments if they aren't known to be pointers
-template<typename FromPtr, typename ToPtr>
-  struct lazy_is_pointer_convertible
-    : thrust::detail::eval_if<
-        is_thrust_pointer<FromPtr>::value && is_thrust_pointer<ToPtr>::value,
-        is_pointer_convertible<FromPtr,ToPtr>,
-        thrust::detail::identity_<thrust::detail::false_type>
-      >
-{};
-
-template<typename FromPtr, typename ToPtr, typename T = void>
-  struct enable_if_pointer_is_convertible
-    : thrust::detail::enable_if<
-        lazy_is_pointer_convertible<FromPtr,ToPtr>::type::value,
-        T
-      >
-{};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/type_traits/result_of.h b/compat/thrust/detail/type_traits/result_of.h
deleted file mode 100644
index e30b4fda3b..0000000000
--- a/compat/thrust/detail/type_traits/result_of.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template<typename Signature, typename Enable = void> struct result_of;
-
-// specialization for unary invocations of things which have result_type
-template<typename Functor, typename Arg1>
-  struct result_of<
-    Functor(Arg1),
-    typename thrust::detail::enable_if<thrust::detail::has_result_type<Functor>::value>::type
-  >
-{
-  typedef typename Functor::result_type type;
-}; // end result_of
-
-// specialization for binary invocations of things which have result_type
-template<typename Functor, typename Arg1, typename Arg2>
-  struct result_of<
-    Functor(Arg1,Arg2),
-    typename thrust::detail::enable_if<thrust::detail::has_result_type<Functor>::value>::type
-  >
-{
-  typedef typename Functor::result_type type;
-};
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/detail/uninitialized_copy.inl b/compat/thrust/detail/uninitialized_copy.inl
deleted file mode 100644
index a01dca53d6..0000000000
--- a/compat/thrust/detail/uninitialized_copy.inl
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uninitialized_copy.inl
- *  \brief Inline file for uninitialized_copy.h.
- */
-
-#include <thrust/uninitialized_copy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/uninitialized_copy.h>
-#include <thrust/system/detail/adl/uninitialized_copy.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename ForwardIterator>
-  ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result)
-{
-  using thrust::system::detail::generic::uninitialized_copy;
-  return uninitialized_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
-} // end uninitialized_copy()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Size, typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result)
-{
-  using thrust::system::detail::generic::uninitialized_copy_n;
-  return uninitialized_copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result);
-} // end uninitialized_copy_n()
-
-
-template<typename InputIterator,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy(InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type   System1;
-  typedef typename thrust::iterator_system<ForwardIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::uninitialized_copy(select_system(system1,system2), first, last, result);
-} // end uninitialized_copy()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(InputIterator first,
-                                       Size n,
-                                       ForwardIterator result)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type   System1;
-  typedef typename thrust::iterator_system<ForwardIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::uninitialized_copy_n(select_system(system1,system2), first, n, result);
-} // end uninitialized_copy_n()
-
-
-} // end thrust
-
-
diff --git a/compat/thrust/detail/uninitialized_fill.inl b/compat/thrust/detail/uninitialized_fill.inl
deleted file mode 100644
index 3545de56ee..0000000000
--- a/compat/thrust/detail/uninitialized_fill.inl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uninitialized_fill.inl
- *  \brief Inline file for uninitialized_fill.h.
- */
-
-#include <thrust/uninitialized_fill.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/uninitialized_fill.h>
-#include <thrust/system/detail/adl/uninitialized_fill.h>
-
-namespace thrust
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void uninitialized_fill(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x)
-{
-  using thrust::system::detail::generic::uninitialized_fill;
-  return uninitialized_fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, x);
-} // end uninitialized_fill()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Size, typename T>
-  ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x)
-{
-  using thrust::system::detail::generic::uninitialized_fill_n;
-  return uninitialized_fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, x);
-} // end uninitialized_fill_n()
-
-
-template<typename ForwardIterator,
-         typename T>
-  void uninitialized_fill(ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  thrust::uninitialized_fill(select_system(system), first, last, x);
-} // end uninitialized_fill()
-
-
-template<typename ForwardIterator,
-         typename Size,
-         typename T>
-  ForwardIterator uninitialized_fill_n(ForwardIterator first,
-                                       Size n,
-                                       const T &x)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::uninitialized_fill_n(select_system(system), first, n, x);
-} // end uninitialized_fill_n()
-
-
-} // end thrust
-
diff --git a/compat/thrust/detail/unique.inl b/compat/thrust/detail/unique.inl
deleted file mode 100644
index e90187d919..0000000000
--- a/compat/thrust/detail/unique.inl
+++ /dev/null
@@ -1,320 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file unique.inl
- *  \brief Inline file for unique.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/unique.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/generic/unique.h>
-#include <thrust/system/detail/generic/unique_by_key.h>
-#include <thrust/system/detail/adl/unique.h>
-#include <thrust/system/detail/adl/unique_by_key.h>
-
-namespace thrust
-{
-
-
-template <typename DerivedPolicy,
-          typename ForwardIterator>
-ForwardIterator unique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last)
-{
-  using thrust::system::detail::generic::unique;
-  return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
-} // end unique()
-
-
-template <typename DerivedPolicy,
-          typename ForwardIterator,
-          typename BinaryPredicate>
-ForwardIterator unique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last,
-                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::unique;
-  return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, binary_pred);
-} // end unique()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator>
-OutputIterator unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator output)
-{
-  using thrust::system::detail::generic::unique_copy;
-  return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output);
-} // end unique_copy()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryPredicate>
-OutputIterator unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator output,
-                           BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::unique_copy;
-  return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, binary_pred);
-} // end unique_copy()
-
-
-template <typename DerivedPolicy,
-          typename ForwardIterator1,
-          typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-  unique_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator1 keys_first, 
-                ForwardIterator1 keys_last,
-                ForwardIterator2 values_first)
-{
-  using thrust::system::detail::generic::unique_by_key;
-  return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first);
-} // end unique_by_key()
-
-
-template <typename DerivedPolicy,
-          typename ForwardIterator1,
-          typename ForwardIterator2,
-          typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-  unique_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator1 keys_first, 
-                ForwardIterator1 keys_last,
-                ForwardIterator2 values_first,
-                BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::unique_by_key;
-  return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, binary_pred);
-} // end unique_by_key()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  unique_by_key_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1 keys_first, 
-                     InputIterator1 keys_last,
-                     InputIterator2 values_first,
-                     OutputIterator1 keys_output,
-                     OutputIterator2 values_output)
-{
-  using thrust::system::detail::generic::unique_by_key_copy;
-  return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output);
-} // end unique_by_key_copy()
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  unique_by_key_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1 keys_first, 
-                     InputIterator1 keys_last,
-                     InputIterator2 values_first,
-                     OutputIterator1 keys_output,
-                     OutputIterator2 values_output,
-                     BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::unique_by_key_copy;
-  return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred);
-} // end unique_by_key_copy()
-
-
-template<typename ForwardIterator>
-  ForwardIterator unique(ForwardIterator first,
-                         ForwardIterator last)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::unique(select_system(system), first, last);
-} // end unique()
-
-
-template<typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator>::type System;
-
-  System system;
-
-  return thrust::unique(select_system(system), first, last, binary_pred);
-} // end unique()
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator unique_copy(InputIterator first,
-                             InputIterator last,
-                             OutputIterator output)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::unique_copy(select_system(system1,system2), first, last, output);
-} // end unique_copy()
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator>::type  System1;
-  typedef typename thrust::iterator_system<OutputIterator>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::unique_copy(select_system(system1,system2), first, last, output, binary_pred);
-} // end unique_copy()
-
-
-template<typename ForwardIterator1,
-         typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator1>::type System1;
-  typedef typename thrust::iterator_system<ForwardIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first);
-} // end unique_by_key()
-
-
-template<typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<ForwardIterator1>::type System1;
-  typedef typename thrust::iterator_system<ForwardIterator2>::type System2;
-
-  System1 system1;
-  System2 system2;
-
-  return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first, binary_pred);
-} // end unique_by_key()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output);
-} // end unique_by_key_copy()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  using thrust::system::detail::generic::select_system;
-
-  typedef typename thrust::iterator_system<InputIterator1>::type  System1;
-  typedef typename thrust::iterator_system<InputIterator2>::type  System2;
-  typedef typename thrust::iterator_system<OutputIterator1>::type System3;
-  typedef typename thrust::iterator_system<OutputIterator2>::type System4;
-
-  System1 system1;
-  System2 system2;
-  System3 system3;
-  System4 system4;
-
-  return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred);
-} // end unique_by_key_copy()
-
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/use_default.h b/compat/thrust/detail/use_default.h
deleted file mode 100644
index c6eb66ef05..0000000000
--- a/compat/thrust/detail/use_default.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-struct use_default {};
-
-} // end thrust
-
diff --git a/compat/thrust/detail/util/align.h b/compat/thrust/detail/util/align.h
deleted file mode 100644
index 10f107a95b..0000000000
--- a/compat/thrust/detail/util/align.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/cstdint.h>
-
-// functions to handle memory alignment
-
-namespace thrust
-{
-namespace detail
-{
-namespace util
-{
-
-template <typename T>
-T * align_up(T * ptr, detail::uintptr_t bytes)
-{
-    return (T *) ( bytes * (((detail::uintptr_t) ptr + (bytes - 1)) / bytes) );
-}
-
-template <typename T>
-T * align_down(T * ptr, detail::uintptr_t bytes)
-{
-    return (T *) ( bytes * (detail::uintptr_t(ptr) / bytes) );
-}
-
-template <typename T>
-bool is_aligned(T * ptr, detail::uintptr_t bytes = sizeof(T))
-{
-    return detail::uintptr_t(ptr) % bytes == 0;
-}
-
-} // end namespace util
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/util/blocking.h b/compat/thrust/detail/util/blocking.h
deleted file mode 100644
index 3bb78a637b..0000000000
--- a/compat/thrust/detail/util/blocking.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-//functions to support blocking
-
-namespace thrust
-{
-
-namespace detail
-{
-
-namespace util
-{
-
-// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc.
-template<typename L, typename R>
-  inline __host__ __device__ L divide_ri(const L x, const R y)
-{
-    return (x + (y - 1)) / y;
-}
-
-// x/y rounding towards zero for integers, used to determine # of blocks/warps etc.
-template<typename L, typename R>
-  inline __host__ __device__ L divide_rz(const L x, const R y)
-{
-    return x / y;
-}
-
-// round x towards infinity to the next multiple of y
-template<typename L, typename R>
-  inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); }
-
-// round x towards zero to the next multiple of y
-template<typename L, typename R>
-  inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); }
-
-} // end namespace util
-
-} // end namespace detail
-
-} // end namespace thrust
-
diff --git a/compat/thrust/detail/vector_base.h b/compat/thrust/detail/vector_base.h
deleted file mode 100644
index 6974eab554..0000000000
--- a/compat/thrust/detail/vector_base.h
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file vector_base.h
- *  \brief Defines the interface to a base class for
- *         host_vector & device_vector.
- */
-
-#pragma once
-
-#include <thrust/iterator/detail/normal_iterator.h>
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/contiguous_storage.h>
-#include <vector>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename T, typename Alloc>
-  class vector_base
-{
-  private:
-    typedef thrust::detail::contiguous_storage<T,Alloc> storage_type;
-
-  public:
-    // typedefs
-    typedef typename storage_type::value_type      value_type;
-    typedef typename storage_type::pointer         pointer;
-    typedef typename storage_type::const_pointer   const_pointer;
-    typedef typename storage_type::reference       reference;
-    typedef typename storage_type::const_reference const_reference;
-    typedef typename storage_type::size_type       size_type;
-    typedef typename storage_type::difference_type difference_type;
-    typedef typename storage_type::allocator_type  allocator_type;
-
-    typedef typename storage_type::iterator        iterator;
-    typedef typename storage_type::const_iterator  const_iterator;
-
-    typedef thrust::reverse_iterator<iterator>       reverse_iterator;
-    typedef thrust::reverse_iterator<const_iterator> const_reverse_iterator;
-
-    /*! This constructor creates an empty vector_base.
-     */
-    vector_base(void);
-
-    /*! This constructor creates a vector_base with default-constructed
-     *  elements.
-     *  \param n The number of elements to create.
-     */
-    explicit vector_base(size_type n);
-
-    /*! This constructor creates a vector_base with copies
-     *  of an exemplar element.
-     *  \param n The number of elements to initially create.
-     *  \param value An element to copy.
-     */
-    explicit vector_base(size_type n, const value_type &value);
-
-    /*! Copy constructor copies from an exemplar vector_base.
-     *  \param v The vector_base to copy.
-     */
-    vector_base(const vector_base &v);
-
-    /*! assign operator makes a copy of an exemplar vector_base.
-     *  \param v The vector_base to copy.
-     */
-    vector_base &operator=(const vector_base &v);
-
-    /*! Copy constructor copies from an exemplar vector_base with different
-     *  type.
-     *  \param v The vector_base to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    vector_base(const vector_base<OtherT, OtherAlloc> &v);
-
-    /*! assign operator makes a copy of an exemplar vector_base with different
-     *  type.
-     *  \param v The vector_base to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    vector_base &operator=(const vector_base<OtherT,OtherAlloc> &v);
-
-    /*! Copy constructor copies from an exemplar std::vector.
-     *  \param v The std::vector to copy.
-     *  XXX TODO: Make this method redundant with a properly templatized constructor.
-     *            We would like to copy from a vector whose element type is anything
-     *            assignable to value_type.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    vector_base(const std::vector<OtherT, OtherAlloc> &v);
-
-    /*! assign operator makes a copy of an exemplar std::vector.
-     *  \param v The vector to copy.
-     *  XXX TODO: Templatize this assign on the type of the vector to copy from.
-     *            We would like to copy from a vector whose element type is anything
-     *            assignable to value_type.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    vector_base &operator=(const std::vector<OtherT,OtherAlloc> &v);
-
-    /*! This constructor builds a vector_base from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    vector_base(InputIterator first, InputIterator last);
-
-    /*! The destructor erases the elements.
-     */
-    ~vector_base(void);
-
-    /*! \brief Resizes this vector_base to the specified number of elements.
-     *  \param new_size Number of elements this vector_base should contain.
-     *  \throw std::length_error If n exceeds max_size9).
-     *
-     *  This method will resize this vector_base to the specified number of
-     *  elements. If the number is smaller than this vector_base's current
-     *  size this vector_base is truncated, otherwise this vector_base is
-     *  extended and new elements are default constructed.
-     */
-    void resize(size_type new_size);
-
-    /*! \brief Resizes this vector_base to the specified number of elements.
-     *  \param new_size Number of elements this vector_base should contain.
-     *  \param x Data with which new elements should be populated.
-     *  \throw std::length_error If n exceeds max_size().
-     *
-     *  This method will resize this vector_base to the specified number of
-     *  elements.  If the number is smaller than this vector_base's current
-     *  size this vector_base is truncated, otherwise this vector_base is
-     *  extended and new elements are populated with given data.
-     */
-    void resize(size_type new_size, const value_type &x);
-
-    /*! Returns the number of elements in this vector_base.
-     */
-    size_type size(void) const;
-
-    /*! Returns the size() of the largest possible vector_base.
-     *  \return The largest possible return value of size().
-     */
-    size_type max_size(void) const;
-
-    /*! \brief If n is less than or equal to capacity(), this call has no effect.
-     *         Otherwise, this method is a request for allocation of additional memory. If
-     *         the request is successful, then capacity() is greater than or equal to
-     *         n; otherwise, capacity() is unchanged. In either case, size() is unchanged.
-     *  \throw std::length_error If n exceeds max_size().
-     */
-    void reserve(size_type n);
-
-    /*! Returns the number of elements which have been reserved in this
-     *  vector_base.
-     */
-    size_type capacity(void) const;
-
-    /*! This method shrinks the capacity of this vector_base to exactly
-     *  fit its elements.
-     */
-    void shrink_to_fit(void);
-
-    /*! \brief Subscript access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read/write reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    reference operator[](size_type n);
-
-    /*! \brief Subscript read access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    const_reference operator[](size_type n) const;
-
-    /*! This method returns an iterator pointing to the beginning of
-     *  this vector_base.
-     *  \return mStart
-     */
-    iterator begin(void);
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector_base.
-     *  \return mStart
-     */
-    const_iterator begin(void) const;
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector_base.
-     *  \return mStart
-     */
-    const_iterator cbegin(void) const;
-
-    /*! This method returns a reverse_iterator pointing to the beginning of
-     *  this vector_base's reversed sequence.
-     *  \return A reverse_iterator pointing to the beginning of this
-     *          vector_base's reversed sequence.
-     */
-    reverse_iterator rbegin(void);
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector_base's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector_base's reversed sequence.
-     */
-    const_reverse_iterator rbegin(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector_base's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector_base's reversed sequence.
-     */
-    const_reverse_iterator crbegin(void) const;
-
-    /*! This method returns an iterator pointing to one element past the
-     *  last of this vector_base.
-     *  \return begin() + size().
-     */
-    iterator end(void);
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector_base.
-     *  \return begin() + size().
-     */
-    const_iterator end(void) const;
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector_base.
-     *  \return begin() + size().
-     */
-    const_iterator cend(void) const;
-
-    /*! This method returns a reverse_iterator pointing to one element past the
-     *  last of this vector_base's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    reverse_iterator rend(void);
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector_base's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator rend(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector_base's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator crend(void) const;
-
-    /*! This method returns a const_reference referring to the first element of this
-     *  vector_base.
-     *  \return The first element of this vector_base.
-     */
-    const_reference front(void) const;
-
-    /*! This method returns a reference pointing to the first element of this
-     *  vector_base.
-     *  \return The first element of this vector_base.
-     */
-    reference front(void);
-
-    /*! This method returns a const reference pointing to the last element of
-     *  this vector_base.
-     *  \return The last element of this vector_base.
-     */
-    const_reference back(void) const;
-
-    /*! This method returns a reference referring to the last element of
-     *  this vector_dev.
-     *  \return The last element of this vector_base.
-     */
-    reference back(void);
-
-    /*! This method returns a pointer to this vector_base's first element.
-     *  \return A pointer to the first element of this vector_base.
-     */
-    pointer data(void);
-
-    /*! This method returns a const_pointer to this vector_base's first element.
-     *  \return a const_pointer to the first element of this vector_base.
-     */
-    const_pointer data(void) const;
-
-    /*! This method resizes this vector_base to 0.
-     */
-    void clear(void);
-
-    /*! This method returns true iff size() == 0.
-     *  \return true if size() == 0; false, otherwise.
-     */
-    bool empty(void) const;
-
-    /*! This method appends the given element to the end of this vector_base.
-     *  \param x The element to append.
-     */
-    void push_back(const value_type &x);
-
-    /*! This method erases the last element of this vector_base, invalidating
-     *  all iterators and references to it.
-     */
-    void pop_back(void);
-
-    /*! This method swaps the contents of this vector_base with another vector_base.
-     *  \param v The vector_base with which to swap.
-     */
-    void swap(vector_base &v);
-
-    /*! This method removes the element at position pos.
-     *  \param pos The position of the element of interest.
-     *  \return An iterator pointing to the new location of the element that followed the element
-     *          at position pos.
-     */
-    iterator erase(iterator pos);
-
-    /*! This method removes the range of elements [first,last) from this vector_base.
-     *  \param first The beginning of the range of elements to remove.
-     *  \param last The end of the range of elements to remove.
-     *  \return An iterator pointing to the new location of the element that followed the last
-     *          element in the sequence [first,last).
-     */
-    iterator erase(iterator first, iterator last);
-
-    /*! This method inserts a single copy of a given exemplar value at the
-     *  specified position in this vector_base.
-     *  \param position The insertion position.
-     *  \param x The exemplar element to copy & insert.
-     *  \return An iterator pointing to the newly inserted element.
-     */
-    iterator insert(iterator position, const T &x); 
-
-    /*! This method inserts a copy of an exemplar value to a range at the
-     *  specified position in this vector_base.
-     *  \param position The insertion position
-     *  \param n The number of insertions to perform.
-     *  \param x The value to replicate and insert.
-     */
-    void insert(iterator position, size_type n, const T &x);
-
-    /*! This method inserts a copy of an input range at the specified position
-     *  in this vector_base.
-     *  \param position The insertion position.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html>Input Iterator</a>,
-     *                        and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>.
-     */
-    template<typename InputIterator>
-    void insert(iterator position, InputIterator first, InputIterator last);
-
-    /*! This version of \p assign replicates a given exemplar
-     *  \p n times into this vector_base.
-     *  \param n The number of times to copy \p x.
-     *  \param x The exemplar element to replicate.
-     */
-    void assign(size_type n, const T &x);
-
-    /*! This version of \p assign makes this vector_base a copy of a given input range.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>.
-     */
-    template<typename InputIterator>
-    void assign(InputIterator first, InputIterator last);
-
-    /*! This method returns a copy of this vector's allocator.
-     *  \return A copy of the alloctor used by this vector.
-     */
-    allocator_type get_allocator(void) const;
-
-  protected:
-    // Our storage
-    storage_type m_storage;
-
-    // The size of this vector_base, in number of elements.
-    size_type m_size;
-
-  private:
-    // these methods resolve the ambiguity of the constructor template of form (Iterator, Iterator)
-    template<typename IteratorOrIntegralType>
-      void init_dispatch(IteratorOrIntegralType begin, IteratorOrIntegralType end, false_type); 
-
-    template<typename IteratorOrIntegralType>
-      void init_dispatch(IteratorOrIntegralType n, IteratorOrIntegralType value, true_type); 
-
-    template<typename InputIterator>
-      void range_init(InputIterator first, InputIterator last);
-
-    template<typename InputIterator>
-      void range_init(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag);
-
-    template<typename ForwardIterator>
-      void range_init(ForwardIterator first, ForwardIterator last, thrust::random_access_traversal_tag);
-
-    void default_init(size_type n);
-
-    void fill_init(size_type n, const T &x);
-
-    // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator)
-    template<typename InputIteratorOrIntegralType>
-      void insert_dispatch(iterator position, InputIteratorOrIntegralType first, InputIteratorOrIntegralType last, false_type);
-
-    // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator)
-    template<typename InputIteratorOrIntegralType>
-      void insert_dispatch(iterator position, InputIteratorOrIntegralType n, InputIteratorOrIntegralType x, true_type);
-
-    // this method appends n default-constructed elements at the end
-    void append(size_type n);
-
-    // this method performs insertion from a fill value
-    void fill_insert(iterator position, size_type n, const T &x);
-
-    // this method performs insertion from a range
-    template<typename InputIterator>
-      void copy_insert(iterator position, InputIterator first, InputIterator last);
-
-    // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator)
-    template<typename InputIterator>
-      void assign_dispatch(InputIterator first, InputIterator last, false_type);
-
-    // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator)
-    template<typename Integral>
-      void assign_dispatch(Integral n, Integral x, true_type);
-
-    // this method performs assignment from a range
-    template<typename InputIterator>
-      void range_assign(InputIterator first, InputIterator last);
-
-    // this method performs assignment from a range of RandomAccessIterators
-    template<typename RandomAccessIterator>
-      void range_assign(RandomAccessIterator first, RandomAccessIterator last, thrust::random_access_traversal_tag);
-
-    // this method performs assignment from a range of InputIterators
-    template<typename InputIterator>
-      void range_assign(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag);
-
-    // this method performs assignment from a fill value
-    void fill_assign(size_type n, const T &x);
-
-    // this method allocates new storage and construct copies the given range
-    template<typename ForwardIterator>
-    void allocate_and_copy(size_type requested_size,
-                           ForwardIterator first, ForwardIterator last,
-                           storage_type &new_storage);
-}; // end vector_base
-
-} // end detail
-
-/*! This function assigns the contents of vector a to vector b and the
- *  contents of vector b to vector a.
- *
- *  \param a The first vector of interest. After completion, the contents
- *           of b will be returned here.
- *  \param b The second vector of interest. After completion, the contents
- *           of a will be returned here.
- */
-template<typename T, typename Alloc>
-  void swap(detail::vector_base<T,Alloc> &a,
-            detail::vector_base<T,Alloc> &b);
-
-
-/*! This operator allows comparison between two vectors.
- *  \param lhs The first \p vector to compare.
- *  \param rhs The second \p vector to compare.
- *  \return \c true if and only if each corresponding element in either
- *          \p vector equals the other; \c false, otherwise.
- */
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const detail::vector_base<T1,Alloc1>& lhs,
-                const detail::vector_base<T2,Alloc2>& rhs);
-    
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const detail::vector_base<T1,Alloc1>& lhs,
-                const std::vector<T2,Alloc2>&         rhs);
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const std::vector<T1,Alloc1>&         lhs,
-                const detail::vector_base<T2,Alloc2>& rhs);
-
-/*! This operator allows comparison between two vectors.
- *  \param lhs The first \p vector to compare.
- *  \param rhs The second \p vector to compare.
- *  \return \c false if and only if each corresponding element in either
- *          \p vector equals the other; \c true, otherwise.
- */
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const detail::vector_base<T1,Alloc1>& lhs,
-                const detail::vector_base<T2,Alloc2>& rhs);
-    
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const detail::vector_base<T1,Alloc1>& lhs,
-                const std::vector<T2,Alloc2>&         rhs);
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const std::vector<T1,Alloc1>&         lhs,
-                const detail::vector_base<T2,Alloc2>& rhs);
-
-} // end thrust
-
-#include <thrust/detail/vector_base.inl>
-
diff --git a/compat/thrust/detail/vector_base.inl b/compat/thrust/detail/vector_base.inl
deleted file mode 100644
index 24e6466c18..0000000000
--- a/compat/thrust/detail/vector_base.inl
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file vector_base.inl
- *  \brief Inline file for vector_base.h.
- */
-
-#include <thrust/detail/vector_base.h>
-#include <thrust/detail/copy.h>
-#include <thrust/detail/overlapped_copy.h>
-#include <thrust/equal.h>
-#include <thrust/distance.h>
-#include <thrust/advance.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-
-#include <stdexcept>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc>
-    ::vector_base(void)
-      :m_storage(),
-       m_size(0)
-{
-  ;
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc>
-    ::vector_base(size_type n)
-      :m_storage(),
-       m_size(0)
-{
-  default_init(n);
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc>
-    ::vector_base(size_type n, const value_type &value)
-      :m_storage(),
-       m_size(0)
-{
-  fill_init(n,value);
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc>
-    ::vector_base(const vector_base &v)
-      :m_storage(),
-       m_size(0)
-{
-  range_init(v.begin(), v.end());
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc> &
-    vector_base<T,Alloc>
-      ::operator=(const vector_base &v)
-{
-  if(this != &v)
-  {
-    assign(v.begin(), v.end());
-  } // end if
-
-  return *this;
-} // end vector_base::operator=()
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    vector_base<T,Alloc>
-      ::vector_base(const vector_base<OtherT,OtherAlloc> &v)
-        :m_storage(),
-         m_size(0)
-{
-  range_init(v.begin(), v.end());
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    vector_base<T,Alloc> &
-      vector_base<T,Alloc>
-        ::operator=(const vector_base<OtherT,OtherAlloc> &v)
-{
-  assign(v.begin(), v.end());
-
-  return *this;
-} // end vector_base::operator=()
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    vector_base<T,Alloc>
-      ::vector_base(const std::vector<OtherT,OtherAlloc> &v)
-        :m_storage(),
-         m_size(0)
-{
-  range_init(v.begin(), v.end());
-} // end vector_base::vector_base()
-
-template<typename T, typename Alloc>
-  template<typename OtherT, typename OtherAlloc>
-    vector_base<T,Alloc> &
-      vector_base<T,Alloc>
-        ::operator=(const std::vector<OtherT,OtherAlloc> &v)
-{
-  assign(v.begin(), v.end());
-
-  return *this;
-} // end vector_base::operator=()
-
-template<typename T, typename Alloc>
-  template<typename IteratorOrIntegralType>
-    void vector_base<T,Alloc>
-      ::init_dispatch(IteratorOrIntegralType n,
-                      IteratorOrIntegralType value,
-                      true_type)
-{
-  fill_init(n,value);
-} // end vector_base::init_dispatch()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::default_init(size_type n)
-{
-  if(n > 0)
-  {
-    m_storage.allocate(n);
-    m_size = n;
-
-    m_storage.default_construct_n(begin(), size());
-  } // end if
-} // end vector_base::default_init()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::fill_init(size_type n, const T &x)
-{
-  if(n > 0)
-  {
-    m_storage.allocate(n);
-    m_size = n;
-
-    m_storage.uninitialized_fill_n(begin(), size(), x);
-  } // end if
-} // end vector_base::fill_init()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::init_dispatch(InputIterator first,
-                      InputIterator last,
-                      false_type)
-{
-  range_init(first, last);
-} // end vector_base::init_dispatch()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::range_init(InputIterator first,
-                   InputIterator last)
-{
-  range_init(first, last,
-    typename thrust::iterator_traversal<InputIterator>::type());
-} // end vector_base::range_init()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::range_init(InputIterator first,
-                   InputIterator last,
-                   thrust::incrementable_traversal_tag)
-{
-  for(; first != last; ++first)
-    push_back(*first);
-} // end vector_base::range_init()
-
-template<typename T, typename Alloc>
-  template<typename ForwardIterator>
-    void vector_base<T,Alloc>
-      ::range_init(ForwardIterator first,
-                   ForwardIterator last,
-                   thrust::random_access_traversal_tag)
-{
-  size_type new_size = thrust::distance(first, last);
-
-  allocate_and_copy(new_size, first, last, m_storage);
-  m_size    = new_size;
-} // end vector_base::range_init()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    vector_base<T,Alloc>
-      ::vector_base(InputIterator first,
-                    InputIterator last)
-        :m_storage(),
-         m_size(0)
-{
-  // check the type of InputIterator: if it's an integral type,
-  // we need to interpret this call as (size_type, value_type)
-  typedef thrust::detail::is_integral<InputIterator> Integer;
-
-  init_dispatch(first, last, Integer());
-} // end vector_basee::vector_base()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::resize(size_type new_size)
-{
-  if(new_size < size())
-  {
-    iterator new_end = begin();
-    thrust::advance(new_end, new_size);
-    erase(new_end, end());
-  } // end if
-  else
-  {
-    append(new_size - size());
-  } // end else
-} // end vector_base::resize()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::resize(size_type new_size, const value_type &x)
-{
-  if(new_size < size())
-  {
-    iterator new_end = begin();
-    thrust::advance(new_end, new_size);
-    erase(new_end, end());
-  } // end if
-  else
-  {
-    insert(end(), new_size - size(), x);
-  } // end else
-} // end vector_base::resize()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::size_type
-    vector_base<T,Alloc>
-      ::size(void) const
-{
-  return m_size;
-} // end vector_base::size()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::size_type
-    vector_base<T,Alloc>
-      ::max_size(void) const
-{
-  return m_storage.max_size();
-} // end vector_base::max_size()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::reserve(size_type n)
-{
-  if(n > capacity())
-  {
-    allocate_and_copy(n, begin(), end(), m_storage);
-  } // end if
-} // end vector_base::reserve()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::size_type
-    vector_base<T,Alloc>
-      ::capacity(void) const
-{
-  return m_storage.size();
-} // end vector_base::capacity()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::shrink_to_fit(void)
-{
-  // use the swap trick
-  vector_base(*this).swap(*this);
-} // end vector_base::shrink_to_fit()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::reference
-    vector_base<T,Alloc>
-      ::operator[](const size_type n)
-{
-  return m_storage[n];
-} // end vector_base::operator[]
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reference 
-    vector_base<T,Alloc>
-      ::operator[](const size_type n) const
-{
-  return m_storage[n];
-} // end vector_base::operator[]
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::iterator
-    vector_base<T,Alloc>
-      ::begin(void)
-{
-  return m_storage.begin();
-} // end vector_base::begin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_iterator
-    vector_base<T,Alloc>
-      ::begin(void) const
-{
-  return m_storage.begin();
-} // end vector_base::begin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_iterator
-    vector_base<T,Alloc>
-      ::cbegin(void) const
-{
-  return begin();
-} // end vector_base::cbegin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::reverse_iterator
-    vector_base<T,Alloc>
-      ::rbegin(void)
-{
-  return reverse_iterator(end());
-} // end vector_base::rbegin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reverse_iterator
-    vector_base<T,Alloc>
-      ::rbegin(void) const
-{
-  return const_reverse_iterator(end());
-} // end vector_base::rbegin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reverse_iterator
-    vector_base<T,Alloc>
-      ::crbegin(void) const
-{
-  return rbegin();
-} // end vector_base::crbegin()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::iterator
-    vector_base<T,Alloc>
-      ::end(void)
-{
-  iterator result = begin();
-  thrust::advance(result, size());
-  return result;
-} // end vector_base::end()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_iterator
-    vector_base<T,Alloc>
-      ::end(void) const
-{
-  const_iterator result = begin();
-  thrust::advance(result, size());
-  return result;
-} // end vector_base::end()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_iterator
-    vector_base<T,Alloc>
-      ::cend(void) const
-{
-  return end();
-} // end vector_base::cend()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::reverse_iterator
-    vector_base<T,Alloc>
-      ::rend(void)
-{
-  return reverse_iterator(begin());
-} // end vector_base::rend()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reverse_iterator
-    vector_base<T,Alloc>
-      ::rend(void) const
-{
-  return const_reverse_iterator(begin());
-} // end vector_base::rend()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reverse_iterator
-    vector_base<T,Alloc>
-      ::crend(void) const
-{
-  return rend();
-} // end vector_base::crend()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reference
-    vector_base<T,Alloc>
-      ::front(void) const
-{
-  return *begin();
-} // end vector_base::front()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::reference
-    vector_base<T,Alloc>
-      ::front(void)
-{
-  return *begin();
-} // end vector_base::front()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_reference
-    vector_base<T,Alloc>
-      ::back(void) const
-{
-  const_iterator ptr_to_back = end();
-  --ptr_to_back;
-  return *ptr_to_back;
-} // end vector_base::vector_base
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::reference
-    vector_base<T,Alloc>
-      ::back(void)
-{
-  iterator ptr_to_back = end();
-  --ptr_to_back;
-  return *ptr_to_back;
-} // end vector_base::vector_base
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::pointer
-    vector_base<T,Alloc>
-      ::data(void)
-{
-  return &front();
-} // end vector_base::data()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::const_pointer
-    vector_base<T,Alloc>
-      ::data(void) const
-{
-  return &front();
-} // end vector_base::data()
-
-template<typename T, typename Alloc>
-  vector_base<T,Alloc>
-    ::~vector_base(void)
-{
-  // destroy every living thing
-  m_storage.destroy(begin(),end());
-} // end vector_base::~vector_base()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::clear(void)
-{
-  resize(0);
-} // end vector_base::~vector_dev()
-
-template<typename T, typename Alloc>
-  bool vector_base<T,Alloc>
-    ::empty(void) const
-{
-  return size() == 0;
-} // end vector_base::empty();
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::push_back(const value_type &x)
-{
-  insert(end(), x);
-} // end vector_base::push_back()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::pop_back(void)
-{
-  iterator e = end();
-  iterator ptr_to_back = e;
-  --ptr_to_back;
-  m_storage.destroy(ptr_to_back, e);
-  --m_size;
-} // end vector_base::pop_back()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::iterator vector_base<T,Alloc>
-    ::erase(iterator pos)
-{
-  iterator end = pos;
-  ++end;
-  return erase(pos,end);
-} // end vector_base::erase()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::iterator vector_base<T,Alloc>
-    ::erase(iterator first, iterator last)
-{
-  // overlap copy the range [last,end()) to first
-  // XXX this copy only potentially overlaps
-  iterator i = thrust::detail::overlapped_copy(last, end(), first);
-
-  // destroy everything after i
-  m_storage.destroy(i, end());
-
-  // modify our size
-  m_size -= (last - first);
-
-  // return an iterator pointing to the position of the first element
-  // following the erased range
-  return first;
-} // end vector_base::erase()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::swap(vector_base &v)
-{
-  thrust::swap(m_storage,  v.m_storage);
-  thrust::swap(m_size,     v.m_size);
-} // end vector_base::swap()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::assign(size_type n, const T &x)
-{
-  fill_assign(n, x);
-} // end vector_base::assign()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::assign(InputIterator first, InputIterator last)
-{
-  // we could have received assign(n, x), so disambiguate on the
-  // type of InputIterator
-  typedef typename thrust::detail::is_integral<InputIterator> integral;
-
-  assign_dispatch(first, last, integral());
-} // end vector_base::assign()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::allocator_type
-    vector_base<T,Alloc>
-      ::get_allocator(void) const
-{
-  return m_storage.get_allocator();
-} // end vector_base::get_allocator()
-
-template<typename T, typename Alloc>
-  typename vector_base<T,Alloc>::iterator
-    vector_base<T,Alloc>
-      ::insert(iterator position, const T &x)
-{
-  // find the index of the insertion
-  size_type index = thrust::distance(begin(), position);
-
-  // make the insertion
-  insert(position, 1, x);
-
-  // return an iterator pointing back to position
-  iterator result = begin();
-  thrust::advance(result, index);
-  return result;
-} // end vector_base::insert()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::insert(iterator position, size_type n, const T &x)
-{
-  fill_insert(position, n, x);
-} // end vector_base::insert()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::insert(iterator position, InputIterator first, InputIterator last)
-{
-  // we could have received insert(position, n, x), so disambiguate on the
-  // type of InputIterator
-  typedef typename thrust::detail::is_integral<InputIterator> integral;
-
-  insert_dispatch(position, first, last, integral());
-} // end vector_base::insert()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::assign_dispatch(InputIterator first, InputIterator last, false_type)
-{
-  range_assign(first, last);
-} // end vector_base::assign_dispatch()
-
-template<typename T, typename Alloc>
-  template<typename Integral>
-    void vector_base<T,Alloc>
-      ::assign_dispatch(Integral n, Integral x, true_type)
-{
-  fill_assign(n, x);
-} // end vector_base::assign_dispatch()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::insert_dispatch(iterator position, InputIterator first, InputIterator last, false_type)
-{
-  copy_insert(position, first, last);
-} // end vector_base::insert_dispatch()
-
-template<typename T, typename Alloc>
-  template<typename Integral>
-    void vector_base<T,Alloc>
-      ::insert_dispatch(iterator position, Integral n, Integral x, true_type)
-{
-  fill_insert(position, n, x);
-} // end vector_base::insert_dispatch()
-
-template<typename T, typename Alloc>
-  template<typename ForwardIterator>
-    void vector_base<T,Alloc>
-      ::copy_insert(iterator position,
-                    ForwardIterator first,
-                    ForwardIterator last)
-{
-  if(first != last)
-  {
-    // how many new elements will we create?
-    const size_type num_new_elements = thrust::distance(first, last);
-    if(capacity() - size() >= num_new_elements)
-    {
-      // we've got room for all of them
-      // how many existing elements will we displace?
-      const size_type num_displaced_elements = end() - position;
-      iterator old_end = end();
-
-      if(num_displaced_elements > num_new_elements)
-      {
-        // construct copy n displaced elements to new elements
-        // following the insertion
-        m_storage.uninitialized_copy(end() - num_new_elements, end(), end());
-
-        // extend the size
-        m_size += num_new_elements;
-
-        // copy num_displaced_elements - num_new_elements elements to existing elements
-        // this copy overlaps
-        const size_type copy_length = (old_end - num_new_elements) - position;
-        thrust::detail::overlapped_copy(position, old_end - num_new_elements, old_end - copy_length);
-
-        // finally, copy the range to the insertion point
-        thrust::copy(first, last, position);
-      } // end if
-      else
-      {
-        ForwardIterator mid = first;
-        thrust::advance(mid, num_displaced_elements);
-
-        // construct copy new elements at the end of the vector
-        m_storage.uninitialized_copy(mid, last, end());
-
-        // extend the size
-        m_size += num_new_elements - num_displaced_elements;
-
-        // construct copy the displaced elements
-        m_storage.uninitialized_copy(position, old_end, end());
-
-        // extend the size
-        m_size += num_displaced_elements;
-
-        // copy to elements which already existed
-        thrust::copy(first, mid, position);
-      } // end else
-    } // end if
-    else
-    {
-      const size_type old_size = size();
-
-      // compute the new capacity after the allocation
-      size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, num_new_elements);
-
-      // allocate exponentially larger new storage
-      new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, 2 * capacity());
-
-      // do not exceed maximum storage
-      new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, max_size());
-
-      if(new_capacity > max_size())
-      {
-        throw std::length_error("insert(): insertion exceeds max_size().");
-      } // end if
-
-      storage_type new_storage(new_capacity);
-
-      // record how many constructors we invoke in the try block below
-      iterator new_end = new_storage.begin();
-
-      try
-      {
-        // construct copy elements before the insertion to the beginning of the newly
-        // allocated storage
-        new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin());
-
-        // construct copy elements to insert
-        new_end = m_storage.uninitialized_copy(first, last, new_end);
-
-        // construct copy displaced elements from the old storage to the new storage
-        // remember [position, end()) refers to the old storage
-        new_end = m_storage.uninitialized_copy(position, end(), new_end);
-      } // end try
-      catch(...)
-      {
-        // something went wrong, so destroy & deallocate the new storage 
-        m_storage.destroy(new_storage.begin(), new_end);
-        new_storage.deallocate();
-
-        // rethrow
-        throw;
-      } // end catch
-
-      // call destructors on the elements in the old storage
-      m_storage.destroy(begin(), end());
-
-      // record the vector's new state
-      m_storage.swap(new_storage);
-      m_size = old_size + num_new_elements;
-    } // end else
-  } // end if
-} // end vector_base::copy_insert()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::append(size_type n)
-{
-  if(n != 0)
-  {
-    if(capacity() - size() >= n)
-    {
-      // we've got room for all of them
-
-      // default construct new elements at the end of the vector
-      m_storage.default_construct_n(end(), n);
-
-      // extend the size
-      m_size += n;
-    } // end if
-    else
-    {
-      const size_type old_size = size();
-
-      // compute the new capacity after the allocation
-      size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n);
-
-      // allocate exponentially larger new storage
-      new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, 2 * capacity());
-
-      // do not exceed maximum storage
-      new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, max_size());
-
-      // create new storage
-      storage_type new_storage(new_capacity);
-
-      // record how many constructors we invoke in the try block below
-      iterator new_end = new_storage.begin();
-
-      try
-      {
-        // construct copy all elements into the newly allocated storage
-        new_end = m_storage.uninitialized_copy(begin(), end(), new_storage.begin());
-
-        // construct new elements to insert
-        m_storage.default_construct_n(new_end, n);
-        new_end += n;
-      } // end try
-      catch(...)
-      {
-        // something went wrong, so destroy & deallocate the new storage 
-        m_storage.destroy(new_storage.begin(), new_end);
-        new_storage.deallocate();
-
-        // rethrow
-        throw;
-      } // end catch
-
-      // call destructors on the elements in the old storage
-      m_storage.destroy(begin(), end());
-
-      // record the vector's new state
-      m_storage.swap(new_storage);
-      m_size    = old_size + n;
-    } // end else
-  } // end if
-} // end vector_base::append()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::fill_insert(iterator position, size_type n, const T &x)
-{
-  if(n != 0)
-  {
-    if(capacity() - size() >= n)
-    {
-      // we've got room for all of them
-      // how many existing elements will we displace?
-      const size_type num_displaced_elements = end() - position;
-      iterator old_end = end();
-
-      if(num_displaced_elements > n)
-      {
-        // construct copy n displaced elements to new elements
-        // following the insertion
-        m_storage.uninitialized_copy(end() - n, end(), end());
-
-        // extend the size
-        m_size += n;
-
-        // copy num_displaced_elements - n elements to existing elements
-        // this copy overlaps
-        const size_type copy_length = (old_end - n) - position;
-        thrust::detail::overlapped_copy(position, old_end - n, old_end - copy_length);
-
-        // finally, fill the range to the insertion point
-        thrust::fill_n(position, n, x);
-      } // end if
-      else
-      {
-        // construct new elements at the end of the vector
-        m_storage.uninitialized_fill_n(end(), n - num_displaced_elements, x);
-
-        // extend the size
-        m_size += n - num_displaced_elements;
-
-        // construct copy the displaced elements
-        m_storage.uninitialized_copy(position, old_end, end());
-
-        // extend the size
-        m_size += num_displaced_elements;
-
-        // fill to elements which already existed
-        thrust::fill(position, old_end, x);
-      } // end else
-    } // end if
-    else
-    {
-      const size_type old_size = size();
-
-      // compute the new capacity after the allocation
-      size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n);
-
-      // allocate exponentially larger new storage
-      new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, 2 * capacity());
-
-      // do not exceed maximum storage
-      new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION <size_type>(new_capacity, max_size());
-
-      if(new_capacity > max_size())
-      {
-        throw std::length_error("insert(): insertion exceeds max_size().");
-      } // end if
-
-      storage_type new_storage(new_capacity);
-
-      // record how many constructors we invoke in the try block below
-      iterator new_end = new_storage.begin();
-
-      try
-      {
-        // construct copy elements before the insertion to the beginning of the newly
-        // allocated storage
-        new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin());
-
-        // construct new elements to insert
-        m_storage.uninitialized_fill_n(new_end, n, x);
-        new_end += n;
-
-        // construct copy displaced elements from the old storage to the new storage
-        // remember [position, end()) refers to the old storage
-        new_end = m_storage.uninitialized_copy(position, end(), new_end);
-      } // end try
-      catch(...)
-      {
-        // something went wrong, so destroy & deallocate the new storage 
-        m_storage.destroy(new_storage.begin(), new_end);
-        new_storage.deallocate();
-
-        // rethrow
-        throw;
-      } // end catch
-
-      // call destructors on the elements in the old storage
-      m_storage.destroy(begin(), end());
-
-      // record the vector's new state
-      m_storage.swap(new_storage);
-      m_size    = old_size + n;
-    } // end else
-  } // end if
-} // end vector_base::fill_insert()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::range_assign(InputIterator first,
-                     InputIterator last)
-{
-  // dispatch on traversal
-  range_assign(first, last,
-    typename thrust::iterator_traversal<InputIterator>::type());
-} // end range_assign()
-
-template<typename T, typename Alloc>
-  template<typename InputIterator>
-    void vector_base<T,Alloc>
-      ::range_assign(InputIterator first,
-                     InputIterator last,
-                     thrust::incrementable_traversal_tag)
-{
-  iterator current(begin());
-
-  // assign to elements which already exist
-  for(; first != last && current != end(); ++current, ++first)
-  {
-    *current = *first;
-  } // end for
-  
-  // either just the input was exhausted or both
-  // the input and vector elements were exhausted
-  if(first == last)
-  {
-    // if we exhausted the input, erase leftover elements
-    erase(current, end());
-  } // end if
-  else
-  {
-    // insert the rest of the input at the end of the vector
-    insert(end(), first, last);
-  } // end else
-} // end vector_base::range_assign()
-
-template<typename T, typename Alloc>
-  template<typename RandomAccessIterator>
-    void vector_base<T,Alloc>
-      ::range_assign(RandomAccessIterator first,
-                     RandomAccessIterator last,
-                     thrust::random_access_traversal_tag)
-{
-  const size_type n = thrust::distance(first, last);
-
-  if(n > capacity())
-  {
-    storage_type new_storage;
-    allocate_and_copy(n, first, last, new_storage);
-
-    // call destructors on the elements in the old storage
-    m_storage.destroy(begin(), end());
-
-    // record the vector's new state
-    m_storage.swap(new_storage);
-    m_size = n;
-  } // end if
-  else if(size() >= n)
-  {
-    // we can already accomodate the new range
-    iterator new_end = thrust::copy(first, last, begin());
-
-    // destroy the elements we don't need
-    m_storage.destroy(new_end, end());
-
-    // update size
-    m_size = n;
-  } // end else if
-  else
-  {
-    // range fits inside allocated storage, but some elements
-    // have not been constructed yet
-    
-    // XXX TODO we could possibly implement this with one call
-    // to transform rather than copy + uninitialized_copy
-
-    // copy to elements which already exist
-    RandomAccessIterator mid = first;
-    thrust::advance(mid, size());
-    thrust::copy(first, mid, begin());
-
-    // uninitialize_copy to elements which must be constructed
-    m_storage.uninitialized_copy(mid, last, end());
-
-    // update size
-    m_size = n;
-  } // end else
-} // end vector_base::assign()
-
-template<typename T, typename Alloc>
-  void vector_base<T,Alloc>
-    ::fill_assign(size_type n, const T &x)
-{
-  if(n > capacity())
-  {
-    // XXX we should also include a copy of the allocator:
-    // vector_base<T,Alloc> temp(n, x, get_allocator());
-    vector_base<T,Alloc> temp(n, x);
-    temp.swap(*this);
-  } // end if
-  else if(n > size())
-  {
-    // fill to existing elements
-    thrust::fill(begin(), end(), x);
-
-    // construct uninitialized elements
-    m_storage.uninitialized_fill_n(end(), n - size(), x);
-
-    // adjust size
-    m_size += (n - size());
-  } // end else if
-  else
-  {
-    // fill to existing elements
-    iterator new_end = thrust::fill_n(begin(), n, x);
-
-    // erase the elements after the fill
-    erase(new_end, end());
-  } // end else
-} // end vector_base::fill_assign()
-
-template<typename T, typename Alloc>
-  template<typename ForwardIterator>
-    void vector_base<T,Alloc>
-      ::allocate_and_copy(size_type requested_size,
-                          ForwardIterator first, ForwardIterator last,
-                          storage_type &new_storage)
-{
-  if(requested_size == 0)
-  {
-    new_storage.deallocate();
-    return;
-  } // end if
-
-  // allocate exponentially larger new storage
-  size_type allocated_size = thrust::max<size_type>(requested_size, 2 * capacity());
-
-  // do not exceed maximum storage
-  allocated_size = thrust::min<size_type>(allocated_size, max_size());
-
-  if(requested_size > allocated_size)
-  {
-    throw std::length_error("assignment exceeds max_size().");
-  } // end if
-
-  new_storage.allocate(allocated_size);
-
-  try
-  {
-    // construct the range to the newly allocated storage
-    m_storage.uninitialized_copy(first, last, new_storage.begin());
-  } // end try
-  catch(...)
-  {
-    // something went wrong, so destroy & deallocate the new storage 
-    // XXX seems like this destroys too many elements -- should just be last - first instead of requested_size
-    iterator new_storage_end = new_storage.begin();
-    thrust::advance(new_storage_end, requested_size);
-    m_storage.destroy(new_storage.begin(), new_storage_end);
-    new_storage.deallocate();
-
-    // rethrow
-    throw;
-  } // end catch
-} // end vector_base::allocate_and_copy()
-
-
-} // end detail
-
-template<typename T, typename Alloc>
-  void swap(detail::vector_base<T,Alloc> &a,
-            detail::vector_base<T,Alloc> &b)
-{
-  a.swap(b);
-} // end swap()
-
-
-
-namespace detail
-{
-    
-// iterator tags match
-template <typename InputIterator1, typename InputIterator2>
-bool vector_equal(InputIterator1 first1, InputIterator1 last1,
-                  InputIterator2 first2,
-                  thrust::detail::true_type)
-{
-  return thrust::equal(first1, last1, first2);
-}
-
-// iterator tags differ
-template <typename InputIterator1, typename InputIterator2>
-bool vector_equal(InputIterator1 first1, InputIterator1 last1,
-                  InputIterator2 first2,
-                  thrust::detail::false_type)
-{
-  typename thrust::iterator_difference<InputIterator1>::type n = thrust::distance(first1,last1);
-
-  typedef typename thrust::iterator_system<InputIterator1>::type FromSystem1;
-  typedef typename thrust::iterator_system<InputIterator2>::type FromSystem2;
-
-  // bring both ranges to the host system
-  // note that these copies are no-ops if the range is already convertible to the host system
-  FromSystem1 from_system1;
-  FromSystem2 from_system2;
-  thrust::host_system_tag to_system;
-  thrust::detail::move_to_system<InputIterator1, FromSystem1, thrust::host_system_tag> rng1(from_system1, to_system, first1, last1);
-  thrust::detail::move_to_system<InputIterator2, FromSystem2, thrust::host_system_tag> rng2(from_system2, to_system, first2, first2 + n);
-
-  return thrust::equal(rng1.begin(), rng1.end(), rng2.begin());
-}
-
-template <typename InputIterator1, typename InputIterator2>
-bool vector_equal(InputIterator1 first1, InputIterator1 last1,
-                  InputIterator2 first2)
-{
-  typedef typename thrust::iterator_system<InputIterator1>::type system1;
-  typedef typename thrust::iterator_system<InputIterator2>::type system2;
-
-  // dispatch on the sameness of the two systems
-  return vector_equal(first1, last1, first2,
-    thrust::detail::is_same<system1,system2>());
-}
-
-} // end namespace detail
-
-
-
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const detail::vector_base<T1,Alloc1>& lhs,
-                const detail::vector_base<T2,Alloc2>& rhs)
-{
-    return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin());
-}
-    
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const detail::vector_base<T1,Alloc1>& lhs,
-                const std::vector<T2,Alloc2>&         rhs)
-{
-    return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin());
-}
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator==(const std::vector<T1,Alloc1>&         lhs,
-                const detail::vector_base<T2,Alloc2>& rhs)
-{
-    return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin());
-}
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const detail::vector_base<T1,Alloc1>& lhs,
-                const detail::vector_base<T2,Alloc2>& rhs)
-{
-    return !(lhs == rhs);
-}
-    
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const detail::vector_base<T1,Alloc1>& lhs,
-                const std::vector<T2,Alloc2>&         rhs)
-{
-    return !(lhs == rhs);
-}
-
-template<typename T1, typename Alloc1,
-         typename T2, typename Alloc2>
-bool operator!=(const std::vector<T1,Alloc1>&         lhs,
-                const detail::vector_base<T2,Alloc2>& rhs)
-{
-    return !(lhs == rhs);
-}
-
-} // end thrust
-
diff --git a/compat/thrust/device_allocator.h b/compat/thrust/device_allocator.h
deleted file mode 100644
index a5462d1a28..0000000000
--- a/compat/thrust/device_allocator.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_allocator.h
- *  \brief An allocator which creates new elements in device memory
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_new_allocator.h>
-#include <limits>
-#include <stdexcept>
-
-namespace thrust
-{
-
-/*! \addtogroup memory_management_classes Memory Management Classes
- *  \{
- */
-
-template<typename T> class device_allocator;
-
-/*! \p device_allocator<void> is a device memory allocator.
- *  This class is a specialization for \c void.
- *
- *  \see device_ptr
- *  \see http://www.sgi.com/tech/stl/Allocators.html
- */
-template<>
-  class device_allocator<void>
-{
-  public:
-    /*! Type of element allocated, \c void. */
-    typedef void                              value_type;
-
-    /*! Pointer to allocation, \c device_ptr<void>. */
-    typedef device_ptr<void>                  pointer;
-
-    /*! \c const pointer to allocation, \c device_ptr<const void>. */
-    typedef device_ptr<const void>            const_pointer;
-
-    /*! Type of allocation size, \c std::size_t. */
-    typedef std::size_t                       size_type;
-
-    /*! Type of allocation difference, \c pointer::difference_type. */
-    typedef pointer::difference_type difference_type;
-
-    /*! The \p rebind metafunction provides the type of a \p device_allocator
-     *  instantiated with another type.
-     *
-     *  \tparam U The other type to use for instantiation.
-     */
-    template<typename U>
-      struct rebind
-    {
-      /*! The typedef \p other gives the type of the rebound \p device_allocator.
-       */
-      typedef device_allocator<U> other;
-    }; // end rebind
-}; // end device_allocator<void>
-
-/*! \p device_allocator is a device memory allocator.
- *  This implementation inherits from \p device_new_allocator.
- *
- *  \see device_ptr
- *  \see device_new_allocator
- *  \see http://www.sgi.com/tech/stl/Allocators.html
- */
-template<typename T>
-  class device_allocator
-    : public device_new_allocator<T>
-{
-  public:
-    /*! The \p rebind metafunction provides the type of a \p device_allocator
-     *  instantiated with another type.
-     *
-     *  \tparam U The other type to use for instantiation.
-     */
-    template<typename U>
-      struct rebind
-    {
-      /*! The typedef \p other gives the type of the rebound \p device_allocator.
-       */
-      typedef device_allocator<U> other;
-    }; // end rebind
-
-    /*! No-argument constructor has no effect.
-     */
-    __host__ __device__
-    inline device_allocator() {}
-
-    /*! Copy constructor has no effect.
-     */
-    __host__ __device__
-    inline device_allocator(device_allocator const&) {}
-
-    /*! Constructor from other \p allocator has no effect.
-     */
-    template<typename U>
-    __host__ __device__
-    inline device_allocator(device_allocator<U> const&) {}
-}; // end device_allocator
-
-/*! \}
- */
-
-} // end thrust
-
diff --git a/compat/thrust/device_delete.h b/compat/thrust/device_delete.h
deleted file mode 100644
index 1df3bb6f46..0000000000
--- a/compat/thrust/device_delete.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_delete.h
- *  \brief Deletes variables in device memory
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-
-namespace thrust
-{
-
-/*! \addtogroup deallocation_functions Deallocation Functions
- *  \ingroup memory_management_functions
- *  \{
- */
-
-/*! \p device_delete deletes a \p device_ptr allocated with
- *  \p device_new.
- *
- *  \param ptr The \p device_ptr to delete, assumed to have
- *         been allocated with \p device_new.
- *  \param n The number of objects to destroy at \p ptr. Defaults to \c 1
- *         similar to \p device_new.
- *
- *  \see device_ptr
- *  \see device_new
- */
-template<typename T>
-  inline void device_delete(thrust::device_ptr<T> ptr,
-                            const size_t n = 1);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_delete.inl>
-
diff --git a/compat/thrust/device_free.h b/compat/thrust/device_free.h
deleted file mode 100644
index a734418e58..0000000000
--- a/compat/thrust/device_free.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_free.h
- *  \brief Deallocates storage allocated by \p device_malloc
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-
-namespace thrust
-{
-
-/*! \addtogroup deallocation_functions Deallocation Functions
- *  \ingroup memory_management_functions
- *  \{
- */
-
-/*! \p device_free deallocates memory allocated by the function \p device_malloc.
- *
- *  \param ptr A \p device_ptr pointing to memory to be deallocated.
- *
- *  The following code snippet demonstrates how to use \p device_free to
- *  deallocate memory allocated by \p device_malloc.
- *
- *  \code
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_free.h>
- *  ...
- *  // allocate some integers with device_malloc
- *  const int N = 100;
- *  thrust::device_ptr<int> int_array = thrust::device_malloc<int>(N);
- *
- *  // manipulate integers
- *  ...
- *
- *  // deallocate with device_free
- *  thrust::device_free(int_array);
- *  \endcode
- *
- *  \see device_ptr
- *  \see device_malloc
- */
-inline void device_free(thrust::device_ptr<void> ptr);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_free.inl>
-
diff --git a/compat/thrust/device_malloc.h b/compat/thrust/device_malloc.h
deleted file mode 100644
index a3b07234f9..0000000000
--- a/compat/thrust/device_malloc.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_malloc.h
- *  \brief Allocates storage in device memory
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-#include <cstddef> // for std::size_t
-
-namespace thrust
-{
-
-/*! \addtogroup allocation_functions Allocation Functions
- *  \ingroup memory_management_functions
- *  \{
- */
-
-/*! This version of \p device_malloc allocates sequential device storage
- *  for bytes.
- *
- *  \param n The number of bytes to allocate sequentially
- *           in device memory.
- *  \return A \p device_ptr to the newly allocated memory.
- *
- *  The following code snippet demonstrates how to use \p device_malloc to
- *  allocate a range of device memory.
- *
- *  \code
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_free.h>
- *  ...
- *  // allocate some memory with device_malloc
- *  const int N = 100;
- *  thrust::device_ptr<void> void_ptr = thrust::device_malloc(N);
- *
- *  // manipulate memory
- *  ...
- *
- *  // deallocate with device_free
- *  thrust::device_free(void_ptr);
- *  \endcode
- *
- *  \see device_ptr
- *  \see device_free
- */
-inline thrust::device_ptr<void> device_malloc(const std::size_t n);
-
-/*! This version of \p device_malloc allocates sequential device storage for
- *  new objects of the given type.
- *
- *  \param n The number of objects of type T to allocate
- *           sequentially in device memory.
- *  \return A \p device_ptr to the newly allocated memory.
- *
- *  The following code snippet demonstrates how to use \p device_malloc to
- *  allocate a range of device memory.
- *
- *  \code
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_free.h>
- *  ...
- *  // allocate some integers with device_malloc
- *  const int N = 100;
- *  thrust::device_ptr<int> int_array = thrust::device_malloc<int>(N);
- *
- *  // manipulate integers
- *  ...
- *
- *  // deallocate with device_free
- *  thrust::device_free(int_array);
- *  \endcode
- *
- *  \see device_ptr
- *  \see device_free
- */
-template<typename T>
-  inline thrust::device_ptr<T> device_malloc(const std::size_t n);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_malloc.inl>
-
diff --git a/compat/thrust/device_malloc_allocator.h b/compat/thrust/device_malloc_allocator.h
deleted file mode 100644
index 404a6d297a..0000000000
--- a/compat/thrust/device_malloc_allocator.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_malloc_allocator.h
- *  \brief An allocator which allocates storage with \p device_malloc
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-#include <thrust/device_reference.h>
-#include <thrust/device_malloc.h>
-#include <thrust/device_free.h>
-#include <limits>
-#include <stdexcept>
-
-namespace thrust
-{
-
-// forward declarations to WAR circular #includes
-template<typename> class device_ptr;
-template<typename T> device_ptr<T> device_malloc(const std::size_t n);
-
-/*! \addtogroup memory_management Memory Management
- *  \addtogroup memory_management_classes Memory Management Classes
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p device_malloc_allocator is a device memory allocator that employs the
- *  \p device_malloc function for allocation.
- *
- *  \see device_malloc
- *  \see device_ptr
- *  \see http://www.sgi.com/tech/stl/Allocators.html
- */
-template<typename T>
-  class device_malloc_allocator
-{
-  public:
-    /*! Type of element allocated, \c T. */
-    typedef T                                 value_type;
-
-    /*! Pointer to allocation, \c device_ptr<T>. */
-    typedef device_ptr<T>                     pointer;
-
-    /*! \c const pointer to allocation, \c device_ptr<const T>. */
-    typedef device_ptr<const T>               const_pointer;
-
-    /*! Reference to allocated element, \c device_reference<T>. */
-    typedef device_reference<T>               reference;
-
-    /*! \c const reference to allocated element, \c device_reference<const T>. */
-    typedef device_reference<const T>         const_reference;
-
-    /*! Type of allocation size, \c std::size_t. */
-    typedef std::size_t                       size_type;
-
-    /*! Type of allocation difference, \c pointer::difference_type. */
-    typedef typename pointer::difference_type difference_type;
-
-    /*! The \p rebind metafunction provides the type of a \p device_malloc_allocator
-     *  instantiated with another type.
-     *
-     *  \tparam U The other type to use for instantiation.
-     */
-    template<typename U>
-      struct rebind
-    {
-      /*! The typedef \p other gives the type of the rebound \p device_malloc_allocator.
-       */
-      typedef device_malloc_allocator<U> other;
-    }; // end rebind
-
-    /*! No-argument constructor has no effect. */
-    __host__ __device__
-    inline device_malloc_allocator() {}
-
-    /*! No-argument destructor has no effect. */
-    __host__ __device__
-    inline ~device_malloc_allocator() {}
-
-    /*! Copy constructor has no effect. */
-    __host__ __device__
-    inline device_malloc_allocator(device_malloc_allocator const&) {}
-
-    /*! Constructor from other \p device_malloc_allocator has no effect. */
-    template<typename U>
-    __host__ __device__
-    inline device_malloc_allocator(device_malloc_allocator<U> const&) {}
-
-    /*! Returns the address of an allocated object.
-     *  \return <tt>&r</tt>.
-     */
-    __host__ __device__
-    inline pointer address(reference r) { return &r; }
-    
-    /*! Returns the address an allocated object.
-     *  \return <tt>&r</tt>.
-     */
-    __host__ __device__
-    inline const_pointer address(const_reference r) { return &r; }
-
-    /*! Allocates storage for \p cnt objects.
-     *  \param cnt The number of objects to allocate.
-     *  \return A \p pointer to uninitialized storage for \p cnt objects.
-     *  \note Memory allocated by this function must be deallocated with \p deallocate.
-     */
-    __host__
-    inline pointer allocate(size_type cnt,
-                            const_pointer = const_pointer(static_cast<T*>(0)))
-    {
-      if(cnt > this->max_size())
-      {
-        throw std::bad_alloc();
-      } // end if
-
-      return pointer(device_malloc<T>(cnt));
-    } // end allocate()
-
-    /*! Deallocates storage for objects allocated with \p allocate.
-     *  \param p A \p pointer to the storage to deallocate.
-     *  \param cnt The size of the previous allocation.
-     *  \note Memory deallocated by this function must previously have been
-     *        allocated with \p allocate.
-     */
-    __host__
-    inline void deallocate(pointer p, size_type cnt)
-    {
-      device_free(p);
-    } // end deallocate()
-
-    /*! Returns the largest value \c n for which <tt>allocate(n)</tt> might succeed.
-     *  \return The largest value \c n for which <tt>allocate(n)</tt> might succeed.
-     */
-    inline size_type max_size() const
-    {
-      return (std::numeric_limits<size_type>::max)() / sizeof(T);
-    } // end max_size()
-
-    /*! Compares against another \p device_malloc_allocator for equality.
-     *  \return \c true
-     */
-    __host__ __device__
-    inline bool operator==(device_malloc_allocator const&) { return true; }
-
-    /*! Compares against another \p device_malloc_allocator for inequality.
-     *  \return \c false
-     */
-    __host__ __device__
-    inline bool operator!=(device_malloc_allocator const &a) {return !operator==(a); }
-}; // end device_malloc_allocator
-
-/*! \}
- */
-
-} // end thrust
-
-
diff --git a/compat/thrust/device_new.h b/compat/thrust/device_new.h
deleted file mode 100644
index 001d476896..0000000000
--- a/compat/thrust/device_new.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_new.h
- *  \brief Constructs new elements in device memory
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include this for size_t
-#include <cstddef>
-#include <thrust/device_ptr.h>
-
-namespace thrust
-{
-
-/*!
- *  \addtogroup allocation_functions Allocation Functions
- *  \{
- */
-
-/*! \p device_new implements the placement \c new operator for types
- *  resident in device memory. \p device_new calls <tt>T</tt>'s null
- *  constructor on a array of objects in device memory.
- *  No memory is allocated by this function.
- *
- *  \param  p A \p device_ptr to a region of device memory into which
- *          to construct one or many <tt>T</tt>s.
- *  \param  n The number of objects to construct at \p p.
- *  \return p, casted to <tt>T</tt>'s type.
- *
- *  \see device_ptr
- */
-template <typename T>
-  device_ptr<T> device_new(device_ptr<void> p,
-                           const size_t n = 1);
-
-/*! \p device_new implements the placement new operator for types
- *  resident in device memory. \p device_new calls <tt>T</tt>'s copy
- *  constructor on a array of objects in device memory. No memory is
- *  allocated by this function.
- *
- *  \param  p A \p device_ptr to a region of device memory into which to
- *          construct one or many <tt>T</tt>s.
- *  \param exemplar The value from which to copy.
- *  \param  n The number of objects to construct at \p p.
- *  \return p, casted to <tt>T</tt>'s type.
- *
- *  \see device_ptr
- *  \see fill
- */
-template <typename T>
-  device_ptr<T> device_new(device_ptr<void> p,
-                           const T &exemplar,
-                           const size_t n = 1);
-
-/*! \p device_new implements the new operator for types resident in device memory.
- *  It allocates device memory large enough to hold \p n new objects of type \c T.
- *
- *  \param n The number of objects to allocate. Defaults to \c 1.
- *  \return A \p device_ptr to the newly allocated region of device memory.
- */
-template <typename T>
-  device_ptr<T> device_new(const size_t n = 1);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_new.inl>
-
diff --git a/compat/thrust/device_new_allocator.h b/compat/thrust/device_new_allocator.h
deleted file mode 100644
index 527d1fd7b5..0000000000
--- a/compat/thrust/device_new_allocator.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_new_allocator.h
- *  \brief An allocator which allocates storage with \p device_new
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-#include <thrust/device_reference.h>
-#include <thrust/device_new.h>
-#include <thrust/device_delete.h>
-#include <limits>
-#include <stdexcept>
-
-namespace thrust
-{
-
-/*! \addtogroup memory_management Memory Management
- *  \addtogroup memory_management_classes Memory Management Classes
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p device_new_allocator is a device memory allocator that employs the
- *  \p device_new function for allocation.
- *
- *  \see device_new
- *  \see device_ptr
- *  \see http://www.sgi.com/tech/stl/Allocators.html
- */
-template<typename T>
-  class device_new_allocator
-{
-  public:
-    /*! Type of element allocated, \c T. */
-    typedef T                                 value_type;
-
-    /*! Pointer to allocation, \c device_ptr<T>. */
-    typedef device_ptr<T>                     pointer;
-
-    /*! \c const pointer to allocation, \c device_ptr<const T>. */
-    typedef device_ptr<const T>               const_pointer;
-
-    /*! Reference to allocated element, \c device_reference<T>. */
-    typedef device_reference<T>               reference;
-
-    /*! \c const reference to allocated element, \c device_reference<const T>. */
-    typedef device_reference<const T>         const_reference;
-
-    /*! Type of allocation size, \c std::size_t. */
-    typedef std::size_t                       size_type;
-
-    /*! Type of allocation difference, \c pointer::difference_type. */
-    typedef typename pointer::difference_type difference_type;
-
-    /*! The \p rebind metafunction provides the type of a \p device_new_allocator
-     *  instantiated with another type.
-     *
-     *  \tparam U The other type to use for instantiation.
-     */
-    template<typename U>
-      struct rebind
-    {
-      /*! The typedef \p other gives the type of the rebound \p device_new_allocator.
-       */
-      typedef device_new_allocator<U> other;
-    }; // end rebind
-
-    /*! No-argument constructor has no effect. */
-    __host__ __device__
-    inline device_new_allocator() {}
-
-    /*! No-argument destructor has no effect. */
-    __host__ __device__
-    inline ~device_new_allocator() {}
-
-    /*! Copy constructor has no effect. */
-    __host__ __device__
-    inline device_new_allocator(device_new_allocator const&) {}
-
-    /*! Constructor from other \p device_malloc_allocator has no effect. */
-    template<typename U>
-    __host__ __device__
-    inline device_new_allocator(device_new_allocator<U> const&) {}
-
-    /*! Returns the address of an allocated object.
-     *  \return <tt>&r</tt>.
-     */
-    __host__ __device__
-    inline pointer address(reference r) { return &r; }
-    
-    /*! Returns the address an allocated object.
-     *  \return <tt>&r</tt>.
-     */
-    __host__ __device__
-    inline const_pointer address(const_reference r) { return &r; }
-
-    /*! Allocates storage for \p cnt objects.
-     *  \param cnt The number of objects to allocate.
-     *  \return A \p pointer to uninitialized storage for \p cnt objects.
-     *  \note Memory allocated by this function must be deallocated with \p deallocate.
-     */
-    __host__
-    inline pointer allocate(size_type cnt,
-                            const_pointer = const_pointer(static_cast<T*>(0)))
-    {
-      if(cnt > this->max_size())
-      {
-        throw std::bad_alloc();
-      } // end if
-
-      // use "::operator new" rather than keyword new
-      return pointer(device_new<T>(cnt));
-    } // end allocate()
-
-    /*! Deallocates storage for objects allocated with \p allocate.
-     *  \param p A \p pointer to the storage to deallocate.
-     *  \param cnt The size of the previous allocation.
-     *  \note Memory deallocated by this function must previously have been
-     *        allocated with \p allocate.
-     */
-    __host__
-    inline void deallocate(pointer p, size_type cnt)
-    {
-      // use "::operator delete" rather than keyword delete
-      device_delete(p);
-    } // end deallocate()
-
-    /*! Returns the largest value \c n for which <tt>allocate(n)</tt> might succeed.
-     *  \return The largest value \c n for which <tt>allocate(n)</tt> might succeed.
-     */
-    __host__ __device__
-    inline size_type max_size() const
-    {
-      return std::numeric_limits<size_type>::max THRUST_PREVENT_MACRO_SUBSTITUTION () / sizeof(T);
-    } // end max_size()
-
-    /*! Compares against another \p device_malloc_allocator for equality.
-     *  \return \c true
-     */
-    __host__ __device__
-    inline bool operator==(device_new_allocator const&) { return true; }
-
-    /*! Compares against another \p device_malloc_allocator for inequality.
-     *  \return \c false
-     */
-    __host__ __device__
-    inline bool operator!=(device_new_allocator const &a) {return !operator==(a); }
-}; // end device_new_allocator
-
-/*! \}
- */
-
-} // end thrust
-
diff --git a/compat/thrust/device_ptr.h b/compat/thrust/device_ptr.h
deleted file mode 100644
index dfc7e90dc3..0000000000
--- a/compat/thrust/device_ptr.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_ptr.h
- *  \brief A pointer to a variable which resides in the "device" system's memory space
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/memory.h>
-#include <ostream>
-
-namespace thrust
-{
-
-/*! \addtogroup memory_management Memory Management
- *  \addtogroup memory_management_classes Memory Management Classes
- *  \ingroup memory_management
- *  \{
- */
-
-// forward declarations
-template<typename T> class device_reference;
-
-/*! \p device_ptr stores a pointer to an object allocated in device memory. This type
- *  provides type safety when dispatching standard algorithms on ranges resident in
- *  device memory.
- *
- *  \p device_ptr has pointer semantics: it may be dereferenced safely from the host and
- *  may be manipulated with pointer arithmetic.
- *
- *  \p device_ptr can be created with the functions device_malloc, device_new, or
- *  device_pointer_cast, or by explicitly calling its constructor with a raw pointer.
- *
- *  The raw pointer encapsulated by a \p device_ptr may be obtained by either its <tt>get</tt>
- *  method or the \p raw_pointer_cast free function.
- *
- *  \note \p device_ptr is not a smart pointer; it is the programmer's responsibility to
- *  deallocate memory pointed to by \p device_ptr.
- *
- *  \see device_malloc
- *  \see device_new
- *  \see device_pointer_cast
- *  \see raw_pointer_cast
- */
-template<typename T>
-  class device_ptr
-    : public thrust::pointer<
-               T,
-               thrust::device_system_tag,
-               thrust::device_reference<T>,
-               thrust::device_ptr<T>
-             >
-{
-  private:
-    typedef thrust::pointer<
-      T,
-      thrust::device_system_tag,
-      thrust::device_reference<T>,
-      thrust::device_ptr<T>
-    > super_t;
-
-  public:
-    /*! \p device_ptr's null constructor initializes its raw pointer to \c 0.
-     */
-    __host__ __device__
-    device_ptr() : super_t() {}
-
-    /*! \p device_ptr's copy constructor is templated to allow copying to a
-     *  <tt>device_ptr<const T></tt> from a <tt>T *</tt>.
-     *  
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in
-     *         device memory.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    explicit device_ptr(OtherT *ptr) : super_t(ptr) {}
-
-    /*! \p device_ptr's copy constructor allows copying from another device_ptr with related type.
-     *  \param other The \p device_ptr to copy from.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    device_ptr(const device_ptr<OtherT> &other) : super_t(other) {}
-
-    /*! \p device_ptr's assignment operator allows assigning from another \p device_ptr with related type.
-     *  \param other The other \p device_ptr to copy from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT>
-    __host__ __device__
-    device_ptr &operator=(const device_ptr<OtherT> &other)
-    {
-      super_t::operator=(other);
-      return *this;
-    }
-
-// declare these members for the purpose of Doxygenating them
-// they actually exist in a derived-from class
-#if 0
-    /*! This method returns this \p device_ptr's raw pointer.
-     *  \return This \p device_ptr's raw pointer.
-     */
-    __host__ __device__
-    T *get(void) const;
-#endif // end doxygen-only members
-}; // end device_ptr
-
-/*! This operator outputs the value of a \p device_ptr's raw pointer to a \p std::basic_ostream.
- *
- *  \param os The std::basic_ostream of interest.
- *  \param p The device_ptr of interest.
- *  \return os.
- */
-template<class E, class T, class Y>
-inline std::basic_ostream<E, T> &operator<<(std::basic_ostream<E, T> &os, const device_ptr<Y> &p);
-
-/*! \}
- */
-
-
-/*!
- *  \addtogroup memory_management_functions Memory Management Functions
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p device_pointer_cast creates a device_ptr from a raw pointer which is presumed to point
- *  to a location in device memory.
- *
- *  \param ptr A raw pointer, presumed to point to a location in device memory.
- *  \return A device_ptr wrapping ptr.
- */
-template<typename T>
-__host__ __device__
-inline device_ptr<T> device_pointer_cast(T *ptr);
-
-/*! This version of \p device_pointer_cast creates a copy of a device_ptr from another device_ptr.
- *  This version is included for symmetry with \p raw_pointer_cast.
- *
- *  \param ptr A device_ptr.
- *  \return A copy of \p ptr.
- */
-template<typename T>
-__host__ __device__
-inline device_ptr<T> device_pointer_cast(const device_ptr<T> &ptr);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_ptr.inl>
-#include <thrust/detail/raw_pointer_cast.h>
-
diff --git a/compat/thrust/device_reference.h b/compat/thrust/device_reference.h
deleted file mode 100644
index edae2b59af..0000000000
--- a/compat/thrust/device_reference.h
+++ /dev/null
@@ -1,969 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_reference.h
- *  \brief A reference to a variable which resides in the "device" system's memory space
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_ptr.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/reference.h>
-
-namespace thrust
-{
-
-/*! \addtogroup memory_management_classes Memory Management Classes
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p device_reference acts as a reference-like object to an object stored in device memory.
- *  \p device_reference is not intended to be used directly; rather, this type
- *  is the result of deferencing a \p device_ptr. Similarly, taking the address of
- *  a \p device_reference yields a \p device_ptr.
- *  
- *  \p device_reference may often be used from host code in place of operations defined on
- *  its associated \c value_type. For example, when \p device_reference refers to an
- *  arithmetic type, arithmetic operations on it are legal:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> vec(1, 13);
- *
- *    thrust::device_reference<int> ref_to_thirteen = vec[0];
- *
- *    int x = ref_to_thirteen + 1;
- *
- *    // x is 14
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  Similarly, we can print the value of \c ref_to_thirteen in the above code by using an
- *  \c iostream:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <iostream>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> vec(1, 13);
- *
- *    thrust::device_reference<int> ref_to_thirteen = vec[0];
- *
- *    std::cout << ref_to_thirteen << std::endl;
- *
- *    // 13 is printed
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  Of course, we needn't explicitly create a \p device_reference in the previous
- *  example, because one is returned by \p device_vector's bracket operator. A more natural
- *  way to print the value of a \p device_vector element might be:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <iostream>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> vec(1, 13);
- *
- *    std::cout << vec[0] << std::endl;
- *
- *    // 13 is printed
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  These kinds of operations should be used sparingly in performance-critical code, because
- *  they imply a potentially expensive copy between host and device space.
- *
- *  Some operations which are possible with regular objects are impossible with their
- *  corresponding \p device_reference objects due to the requirements of the C++ language. For
- *  example, because the member access operator cannot be overloaded, member variables and functions
- *  of a referent object cannot be directly accessed through its \p device_reference.
- *
- *  The following code, which generates a compiler error, illustrates:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *
- *  struct foo
- *  {
- *    int x;
- *  };
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<foo> foo_vec(1);
- *
- *    thrust::device_reference<foo> foo_ref = foo_vec[0];
- *
- *    foo_ref.x = 13; // ERROR: x cannot be accessed through foo_ref
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  Instead, a host space copy must be created to access \c foo's \c x member:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *
- *  struct foo
- *  {
- *    int x;
- *  };
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<foo> foo_vec(1);
- *
- *    // create a local host-side foo object
- *    foo host_foo;
- *    host_foo.x = 13;
- *
- *    thrust::device_reference<foo> foo_ref = foo_vec[0];
- *
- *    foo_ref = host_foo;
- *
- *    // foo_ref's x member is 13
- *
- *    return 0;
- *  }
- *  \endcode
- *  
- *  Another common case where a \p device_reference cannot directly be used in place of
- *  its referent object occurs when passing them as parameters to functions like \c printf
- *  which have varargs parameters. Because varargs parameters must be Plain Old Data, a
- *  \p device_reference to a POD type requires a cast when passed to \c printf:
- *
- *  \code
- *  #include <stdio.h>
- *  #include <thrust/device_vector.h>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> vec(1,13);
- *
- *    // vec[0] must be cast to int when passing to printf
- *    printf("%d\n", (int) vec[0]);
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see device_ptr
- *  \see device_vector
- */
-template<typename T>
-  class device_reference
-    : public thrust::reference<
-               T,
-               thrust::device_ptr<T>,
-               thrust::device_reference<T>
-             >
-{
-  private:
-    typedef thrust::reference<
-      T,
-      thrust::device_ptr<T>,
-      thrust::device_reference<T>
-    > super_t;
-
-  public:
-    /*! The type of the value referenced by this type of \p device_reference.
-     */
-    typedef typename super_t::value_type value_type;
-
-    /*! The type of the expression <tt>&ref</tt>, where <tt>ref</tt> is a \p device_reference.
-     */
-    typedef typename super_t::pointer    pointer;
-
-    /*! This copy constructor accepts a const reference to another
-     *  \p device_reference. After this \p device_reference is constructed,
-     *  it shall refer to the same object as \p other.
-     *  
-     *  \param other A \p device_reference to copy from.
-     *
-     *  The following code snippet demonstrates the semantics of this
-     *  copy constructor.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_reference<int> ref = v[0];
-     *
-     *  // ref equals the object at v[0]
-     *  assert(ref == v[0]);
-     *
-     *  // the address of ref equals the address of v[0]
-     *  assert(&ref == &v[0]);
-     *
-     *  // modifying v[0] modifies ref
-     *  v[0] = 13;
-     *  assert(ref == 13);
-     *  \endcode
-     *
-     *  \note This constructor is templated primarily to allow initialization of 
-     *  <tt>device_reference<const T></tt> from <tt>device_reference<T></tt>.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    device_reference(const device_reference<OtherT> &other,
-                     typename thrust::detail::enable_if_convertible<
-                       typename device_reference<OtherT>::pointer,
-                       pointer
-                     >::type * = 0)
-      : super_t(other)
-    {}
-
-    /*! This copy constructor initializes this \p device_reference
-     *  to refer to an object pointed to by the given \p device_ptr. After
-     *  this \p device_reference is constructed, it shall refer to the
-     *  object pointed to by \p ptr.
-     *
-     *  \param ptr A \p device_ptr to copy from.
-     *
-     *  The following code snippet demonstrates the semantic of this
-     *  copy constructor.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals the object pointed to by ptr
-     *  assert(ref == *ptr);
-     *
-     *  // the address of ref equals ptr
-     *  assert(&ref == ptr);
-     *
-     *  // modifying *ptr modifies ref
-     *  *ptr = 13;
-     *  assert(ref == 13);
-     *  \endcode
-     */
-    __host__ __device__
-    explicit device_reference(const pointer &ptr)
-      : super_t(ptr)
-    {}
-
-    /*! This assignment operator assigns the value of the object referenced by
-     *  the given \p device_reference to the object referenced by this
-     *  \p device_reference.
-     *
-     *  \param other The \p device_reference to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT>
-    __host__ __device__
-    device_reference &operator=(const device_reference<OtherT> &other);
-
-    /*! Assignment operator assigns the value of the given value to the
-     *  value referenced by this \p device_reference.
-     *  
-     *  \param x The value to assign from.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    device_reference &operator=(const value_type &x);
-
-// declare these members for the purpose of Doxygenating them
-// they actually exist in a derived-from class
-#if 0
-    /*! Address-of operator returns a \p device_ptr pointing to the object
-     *  referenced by this \p device_reference. It does not return the
-     *  address of this \p device_reference.
-     *
-     *  \return A \p device_ptr pointing to the object this
-     *  \p device_reference references.
-     */
-    __host__ __device__
-    pointer operator&(void) const;
-
-    /*! Conversion operator converts this \p device_reference to T
-     *  by returning a copy of the object referenced by this
-     *  \p device_reference.
-     *
-     *  \return A copy of the object referenced by this \p device_reference.
-     */
-    __host__ __device__
-    operator value_type (void) const;
-
-    /*! swaps the value this \p device_reference references with another.
-     *  \p other The other \p device_reference with which to swap.
-     */
-    __host__ __device__
-    void swap(device_reference &other);
-
-    /*! Prefix increment operator increments the object referenced by this
-     *  \p device_reference.
-     *
-     *  \return <tt>*this</tt>
-     *  
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's prefix increment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *
-     *  // increment ref
-     *  ++ref;
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *  \endcode
-     *
-     *  \note The increment executes as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator++(void);
-
-    /*! Postfix increment operator copies the object referenced by this
-     *  \p device_reference, increments the object referenced by this
-     *  \p device_reference, and returns the copy.
-     *
-     *  \return A copy of the object referenced by this \p device_reference
-     *          before being incremented.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's postfix increment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // increment ref
-     *  int x = ref++;
-     *
-     *  // x equals 0
-     *  assert(x == 0)
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *  \endcode
-     *
-     *  \note The increment executes as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    value_type operator++(int);
-
-    /*! Addition assignment operator add-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the add-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's addition assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // add-assign ref
-     *  ref += 5;
-     *
-     *  // ref equals 5
-     *  assert(ref == 5);
-     *
-     *  // the object pointed to by ptr equals 5
-     *  assert(*ptr == 5);
-     *
-     *  // v[0] equals 5
-     *  assert(v[0] == 5);
-     *  \endcode
-     *
-     *  \note The add-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator+=(const T &rhs);
-
-    /*! Prefix decrement operator decrements the object referenced by this
-     *  \p device_reference.
-     *
-     *  \return <tt>*this</tt>
-     *  
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's prefix decrement operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // decrement ref
-     *  --ref;
-     *
-     *  // ref equals -1
-     *  assert(ref == -1);
-     *
-     *  // the object pointed to by ptr equals -1
-     *  assert(*ptr == -1);
-     *
-     *  // v[0] equals -1
-     *  assert(v[0] == -1);
-     *  \endcode
-     *
-     *  \note The decrement executes as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator--(void);
-
-    /*! Postfix decrement operator copies the object referenced by this
-     *  \p device_reference, decrements the object referenced by this
-     *  \p device_reference, and returns the copy.
-     *
-     *  \return A copy of the object referenced by this \p device_reference
-     *          before being decremented.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's postfix decrement operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // decrement ref
-     *  int x = ref--;
-     *
-     *  // x equals 0
-     *  assert(x == 0)
-     *
-     *  // ref equals -1
-     *  assert(ref == -1);
-     *
-     *  // the object pointed to by ptr equals -1
-     *  assert(*ptr == -1);
-     *
-     *  // v[0] equals -1
-     *  assert(v[0] == -1);
-     *  \endcode
-     *
-     *  \note The decrement executes as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    value_type operator--(int);
-
-    /*! Subtraction assignment operator subtract-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the subtraction-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's addition assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // subtract-assign ref
-     *  ref -= 5;
-     *
-     *  // ref equals -5
-     *  assert(ref == -5);
-     *
-     *  // the object pointed to by ptr equals -5
-     *  assert(*ptr == -5);
-     *
-     *  // v[0] equals -5
-     *  assert(v[0] == -5);
-     *  \endcode
-     *
-     *  \note The subtract-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator-=(const T &rhs);
-
-    /*! Multiplication assignment operator multiply-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the multiply-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's multiply assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,1);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *
-     *  // multiply-assign ref
-     *  ref *= 5;
-     *
-     *  // ref equals 5
-     *  assert(ref == 5);
-     *
-     *  // the object pointed to by ptr equals 5
-     *  assert(*ptr == 5);
-     *
-     *  // v[0] equals 5
-     *  assert(v[0] == 5);
-     *  \endcode
-     *
-     *  \note The multiply-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator*=(const T &rhs);
-
-    /*! Division assignment operator divide-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the divide-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's divide assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,5);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 5
-     *  assert(ref == 5);
-     *
-     *  // the object pointed to by ptr equals 5
-     *  assert(*ptr == 5);
-     *
-     *  // v[0] equals 5
-     *  assert(v[0] == 5);
-     *
-     *  // divide-assign ref
-     *  ref /= 5;
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *  \endcode
-     *
-     *  \note The divide-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator/=(const T &rhs);
-
-    /*! Modulation assignment operator modulus-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the divide-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's divide assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,5);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 5
-     *  assert(ref == 5);
-     *
-     *  // the object pointed to by ptr equals 5
-     *  assert(*ptr == 5);
-     *
-     *  // v[0] equals 5
-     *  assert(v[0] == 5);
-     *
-     *  // modulus-assign ref
-     *  ref %= 5;
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *  \endcode
-     *
-     *  \note The modulus-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator%=(const T &rhs);
-
-    /*! Bitwise left shift assignment operator left shift-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the left shift-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's left shift assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,1);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *
-     *  // left shift-assign ref
-     *  ref <<= 1;
-     *
-     *  // ref equals 2
-     *  assert(ref == 2);
-     *
-     *  // the object pointed to by ptr equals 2
-     *  assert(*ptr == 2);
-     *
-     *  // v[0] equals 2
-     *  assert(v[0] == 2);
-     *  \endcode
-     *
-     *  \note The left shift-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator<<=(const T &rhs);
-
-    /*! Bitwise right shift assignment operator right shift-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the right shift-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's right shift assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,2);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 2
-     *  assert(ref == 2);
-     *
-     *  // the object pointed to by ptr equals 2
-     *  assert(*ptr == 2);
-     *
-     *  // v[0] equals 2
-     *  assert(v[0] == 2);
-     *
-     *  // right shift-assign ref
-     *  ref >>= 1;
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *  \endcode
-     *
-     *  \note The right shift-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator>>=(const T &rhs);
-
-    /*! Bitwise AND assignment operator AND-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the AND-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's AND assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,1);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *
-     *  // right AND-assign ref
-     *  ref &= 0;
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *  \endcode
-     *
-     *  \note The AND-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator&=(const T &rhs);
-
-    /*! Bitwise OR assignment operator OR-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the OR-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's OR assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,0);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *
-     *  // right OR-assign ref
-     *  ref |= 1;
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *  \endcode
-     *
-     *  \note The OR-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator|=(const T &rhs);
-
-    /*! Bitwise XOR assignment operator XOR-assigns the object referenced by this
-     *  \p device_reference and returns this \p device_reference.
-     *
-     *  \param rhs The right hand side of the XOR-assignment.
-     *  \return <tt>*this</tt>.
-     *
-     *  The following code snippet demonstrates the semantics of
-     *  \p device_reference's XOR assignment operator.
-     *
-     *  \code
-     *  #include <thrust/device_vector.h>
-     *  #include <assert.h>
-     *  ...
-     *  thrust::device_vector<int> v(1,1);
-     *  thrust::device_ptr<int> ptr = &v[0];
-     *  thrust::device_reference<int> ref(ptr);
-     *
-     *  // ref equals 1
-     *  assert(ref == 1);
-     *
-     *  // the object pointed to by ptr equals 1
-     *  assert(*ptr == 1);
-     *
-     *  // v[0] equals 1
-     *  assert(v[0] == 1);
-     *
-     *  // right XOR-assign ref
-     *  ref ^= 1;
-     *
-     *  // ref equals 0
-     *  assert(ref == 0);
-     *
-     *  // the object pointed to by ptr equals 0
-     *  assert(*ptr == 0);
-     *
-     *  // v[0] equals 0
-     *  assert(v[0] == 0);
-     *  \endcode
-     *
-     *  \note The XOR-assignment executes as as if it were executed on the host.
-     *  This may change in a later version.
-     */
-    device_reference &operator^=(const T &rhs);
-#endif // end doxygen-only members
-}; // end device_reference
-
-/*! swaps the value of one \p device_reference with another.
- *  \p x The first \p device_reference of interest.
- *  \p y The second \p device_reference of interest.
- */
-template<typename T>
-__host__ __device__
-void swap(device_reference<T> &x, device_reference<T> &y);
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_reference.inl>
-
diff --git a/compat/thrust/device_vector.h b/compat/thrust/device_vector.h
deleted file mode 100644
index 8c9d0051a2..0000000000
--- a/compat/thrust/device_vector.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file device_vector.h
- *  \brief A dynamically-sizable array of elements which reside in the "device" memory space
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/device_malloc_allocator.h>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-
-// forward declaration of host_vector
-template<typename T, typename Alloc> class host_vector;
-
-/*! \addtogroup container_classes Container Classes
- *  \addtogroup device_containers Device Containers
- *  \ingroup container_classes
- *  \{
- */
-
-/*! A \p device_vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p device_vector may vary dynamically; memory management is
- *  automatic. The memory associated with a \p device_vector resides in the memory
- *  space of a parallel device.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see host_vector
- */
-template<typename T, typename Alloc = thrust::device_malloc_allocator<T> >
-  class device_vector
-    : public detail::vector_base<T,Alloc>
-{
-  private:
-    typedef detail::vector_base<T,Alloc> Parent;
-
-  public:
-    /*! \cond */
-    typedef typename Parent::size_type  size_type;
-    typedef typename Parent::value_type value_type;
-    /*! \endcond */
-
-    /*! This constructor creates an empty \p device_vector.
-     */
-    __host__
-    device_vector(void)
-      :Parent() {}
-
-    /*! This constructor creates a \p device_vector with the given
-     *  size.
-     *  \param n The number of elements to initially craete.
-     */
-    __host__
-    explicit device_vector(size_type n)
-      :Parent(n) {}
-
-    /*! This constructor creates a \p device_vector with copies
-     *  of an exemplar element.
-     *  \param n The number of elements to initially create.
-     *  \param value An element to copy.
-     */
-    __host__
-    explicit device_vector(size_type n, const value_type &value)
-      :Parent(n,value) {}
-
-    /*! Copy constructor copies from an exemplar \p device_vector.
-     *  \param v The \p device_vector to copy.
-     */
-    __host__
-    device_vector(const device_vector &v)
-      :Parent(v) {}
-
-    /*! Copy constructor copies from an exemplar \p device_vector with different type.
-     *  \param v The \p device_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __device__
-    device_vector(const device_vector<OtherT,OtherAlloc> &v)
-      :Parent(v) {}
-
-    /*! Assign operator copies from an exemplar \p device_vector with different type.
-     *  \param v The \p device_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __device__
-    device_vector &operator=(const device_vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this; }
-
-    /*! Copy constructor copies from an exemplar \c std::vector.
-     *  \param v The <tt>std::vector</tt> to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    device_vector(const std::vector<OtherT,OtherAlloc> &v)
-      :Parent(v) {}
-
-    /*! Assign operator copies from an exemplar <tt>std::vector</tt>.
-     *  \param v The <tt>std::vector</tt> to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    device_vector &operator=(const std::vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this;}
-
-    /*! Copy constructor copies from an exemplar \p host_vector with possibly different type.
-     *  \param v The \p host_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    device_vector(const host_vector<OtherT,OtherAlloc> &v);
-
-    /*! Assign operator copies from an examplar \p host_vector.
-     *  \param v The \p host_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    device_vector &operator=(const host_vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this; }
-
-    /*! This constructor builds a \p device_vector from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    __host__
-    device_vector(InputIterator first, InputIterator last)
-      :Parent(first,last) {}
-
-// declare these members for the purpose of Doxygenating them
-// they actually exist in a derived-from class
-#if 0
-    /*! \brief Resizes this vector to the specified number of elements.
-     *  \param new_size Number of elements this vector should contain.
-     *  \param x Data with which new elements should be populated.
-     *  \throw std::length_error If n exceeds max_size().
-     *
-     *  This method will resize this vector to the specified number of
-     *  elements.  If the number is smaller than this vector's current
-     *  size this vector is truncated, otherwise this vector is
-     *  extended and new elements are populated with given data.
-     */
-    void resize(size_type new_size, const value_type &x = value_type());
-
-    /*! Returns the number of elements in this vector.
-     */
-    size_type size(void) const;
-
-    /*! Returns the size() of the largest possible vector.
-     *  \return The largest possible return value of size().
-     */
-    size_type max_size(void) const;
-
-    /*! \brief If n is less than or equal to capacity(), this call has no effect.
-     *         Otherwise, this method is a request for allocation of additional memory. If
-     *         the request is successful, then capacity() is greater than or equal to
-     *         n; otherwise, capacity() is unchanged. In either case, size() is unchanged.
-     *  \throw std::length_error If n exceeds max_size().
-     */
-    void reserve(size_type n);
-
-    /*! Returns the number of elements which have been reserved in this
-     *  vector.
-     */
-    size_type capacity(void) const;
-
-    /*! This method shrinks the capacity of this vector to exactly
-     *  fit its elements.
-     */
-    void shrink_to_fit(void);
-
-    /*! \brief Subscript access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read/write reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    reference operator[](size_type n);
-
-    /*! \brief Subscript read access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    const_reference operator[](size_type n) const;
-
-    /*! This method returns an iterator pointing to the beginning of
-     *  this vector.
-     *  \return mStart
-     */
-    iterator begin(void);
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector.
-     *  \return mStart
-     */
-    const_iterator begin(void) const;
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector.
-     *  \return mStart
-     */
-    const_iterator cbegin(void) const;
-
-    /*! This method returns a reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    reverse_iterator rbegin(void);
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    const_reverse_iterator rbegin(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    const_reverse_iterator crbegin(void) const;
-
-    /*! This method returns an iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    iterator end(void);
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    const_iterator end(void) const;
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    const_iterator cend(void) const;
-
-    /*! This method returns a reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    reverse_iterator rend(void);
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator rend(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator crend(void) const;
-
-    /*! This method returns a const_reference referring to the first element of this
-     *  vector.
-     *  \return The first element of this vector.
-     */
-    const_reference front(void) const;
-
-    /*! This method returns a reference pointing to the first element of this
-     *  vector.
-     *  \return The first element of this vector.
-     */
-    reference front(void);
-
-    /*! This method returns a const reference pointing to the last element of
-     *  this vector.
-     *  \return The last element of this vector.
-     */
-    const_reference back(void) const;
-
-    /*! This method returns a reference referring to the last element of
-     *  this vector_dev.
-     *  \return The last element of this vector.
-     */
-    reference back(void);
-
-    /*! This method returns a pointer to this vector's first element.
-     *  \return A pointer to the first element of this vector.
-     */
-    pointer data(void);
-
-    /*! This method returns a const_pointer to this vector's first element.
-     *  \return a const_pointer to the first element of this vector.
-     */
-    const_pointer data(void) const;
-
-    /*! This method resizes this vector to 0.
-     */
-    void clear(void);
-
-    /*! This method returns true iff size() == 0.
-     *  \return true if size() == 0; false, otherwise.
-     */
-    bool empty(void) const;
-
-    /*! This method appends the given element to the end of this vector.
-     *  \param x The element to append.
-     */
-    void push_back(const value_type &x);
-
-    /*! This method erases the last element of this vector, invalidating
-     *  all iterators and references to it.
-     */
-    void pop_back(void);
-
-    /*! This method swaps the contents of this vector_base with another vector.
-     *  \param v The vector with which to swap.
-     */
-    void swap(device_vector &v);
-
-    /*! This method removes the element at position pos.
-     *  \param pos The position of the element of interest.
-     *  \return An iterator pointing to the new location of the element that followed the element
-     *          at position pos.
-     */
-    iterator erase(iterator pos);
-
-    /*! This method removes the range of elements [first,last) from this vector.
-     *  \param first The beginning of the range of elements to remove.
-     *  \param last The end of the range of elements to remove.
-     *  \return An iterator pointing to the new location of the element that followed the last
-     *          element in the sequence [first,last).
-     */
-    iterator erase(iterator first, iterator last);
-
-    /*! This method inserts a single copy of a given exemplar value at the
-     *  specified position in this vector.
-     *  \param position The insertion position.
-     *  \param x The exemplar element to copy & insert.
-     *  \return An iterator pointing to the newly inserted element.
-     */
-    iterator insert(iterator position, const T &x); 
-
-    /*! This method inserts a copy of an exemplar value to a range at the
-     *  specified position in this vector.
-     *  \param position The insertion position
-     *  \param n The number of insertions to perform.
-     *  \param x The value to replicate and insert.
-     */
-    void insert(iterator position, size_type n, const T &x);
-
-    /*! This method inserts a copy of an input range at the specified position
-     *  in this vector.
-     *  \param position The insertion position.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html>Input Iterator</a>,
-     *                        and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>.
-     */
-    template<typename InputIterator>
-    void insert(iterator position, InputIterator first, InputIterator last);
-
-    /*! This version of \p assign replicates a given exemplar
-     *  \p n times into this vector.
-     *  \param n The number of times to copy \p x.
-     *  \param x The exemplar element to replicate.
-     */
-    void assign(size_type n, const T &x);
-
-    /*! This version of \p assign makes this vector a copy of a given input range.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>.
-     */
-    template<typename InputIterator>
-    void assign(InputIterator first, InputIterator last);
-
-    /*! This method returns a copy of this vector's allocator.
-     *  \return A copy of the alloctor used by this vector.
-     */
-    allocator_type get_allocator(void) const;
-#endif // end doxygen-only members
-}; // end device_vector
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/device_vector.inl>
-
-
diff --git a/compat/thrust/distance.h b/compat/thrust/distance.h
deleted file mode 100644
index 67b41946bf..0000000000
--- a/compat/thrust/distance.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file distance.h
- *  \brief Computes the size of a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \p distance finds the distance between \p first and \p last, i.e. the
- *  number of times that \p first must be incremented until it is equal to
- *  \p last.
- *
- *  \param first The beginning of an input range of interest.
- *  \param last The end of an input range of interest.
- *  \return The distance between the beginning and end of the input range.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *
- *  \pre If \c InputIterator meets the requirements of random access iterator, \p last shall be reachable from \p first or
- *       \p first shall be reachable from \p last; otherwise, \p last shall be reachable from \p first.
- *
- *  The following code snippet demonstrates how to use \p distance to compute
- *  the distance to one iterator from another.
- *
- *  \code
- *  #include <thrust/distance.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> vec(13);
- *  thrust::device_vector<int>::iterator iter1 = vec.begin();
- *  thrust::device_vector<int>::iterator iter2 = iter1 + 7;
- *
- *  int d = thrust::distance(iter1, iter2);
- *
- *  // d is 7
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/distance.html
- */
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last);
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
-#include <thrust/detail/distance.inl>
-
diff --git a/compat/thrust/equal.h b/compat/thrust/equal.h
deleted file mode 100644
index e96946fcf7..0000000000
--- a/compat/thrust/equal.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file equal.h
- *  \brief Equality between ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup comparisons
- *  \ingroup reductions
- *  \{
- */
-
-
-/*! \p equal returns \c true if the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> are identical when
- *  compared element-by-element, and otherwise returns \c false.
- *
- *  This version of \p equal returns \c true if and only if for every
- *  iterator \c i in <tt>[first1, last1)</tt>, <tt>*i == *(first2 + (i - first1))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \return \c true, if the sequences are equal; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p equal to test
- *  two ranges for equality using the \p thrust::host execution policy:
- *
- *  \code
- *  #include <thrust/equal.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[7] = {3, 1, 4, 1, 5, 9, 3};
- *  int A2[7] = {3, 1, 4, 2, 8, 5, 7};
- *  ...
- *  bool result = thrust::equal(thrust::host, A1, A1 + 7, A2);
- *
- *  // result == false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal.html
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-bool equal(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2);
-
-
-/*! \p equal returns \c true if the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> are identical when
- *  compared element-by-element, and otherwise returns \c false.
- *
- *  This version of \p equal returns \c true if and only if for every
- *  iterator \c i in <tt>[first1, last1)</tt>, <tt>*i == *(first2 + (i - first1))</tt>.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \return \c true, if the sequences are equal; \c false, otherwise.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p equal to test
- *  two ranges for equality.
- *
- *  \code
- *  #include <thrust/equal.h>
- *  ...
- *  int A1[7] = {3, 1, 4, 1, 5, 9, 3};
- *  int A2[7] = {3, 1, 4, 2, 8, 5, 7};
- *  ...
- *  bool result = thrust::equal(A1, A1 + 7, A2);
- *
- *  // result == false
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal.html
- */
-template <typename InputIterator1, typename InputIterator2>
-bool equal(InputIterator1 first1, InputIterator1 last1,
-           InputIterator2 first2);
-
-
-/*! \p equal returns \c true if the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> are identical when
- *  compared element-by-element, and otherwise returns \c false.
- *
- *  This version of \p equal returns \c true if and only if for every
- *  iterator \c i in <tt>[first1, last1)</tt>,
- *  <tt>binary_pred(*i, *(first2 + (i - first1)))</tt> is \c true.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param binary_pred Binary predicate used to test element equality.
- *  \return \c true, if the sequences are equal; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p equal to compare the
- *  elements in two ranges modulo 2 using the \p thrust::host execution policy.
- *
- *  \code
- *  #include <thrust/equal.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  __host__ __device__
- *  struct compare_modulo_two
- *  {
- *    bool operator()(int x, int y)
- *    {
- *      return (x % 2) == (y % 2);
- *    }
- *  };
- *  ...
- *  int x[5] = {0, 2, 4, 6, 8, 10};
- *  int y[5] = {1, 3, 5, 7, 9, 11};
- *
- *  bool result = thrust::equal(x, x + 5, y, compare_modulo_two());
- *
- *  // result is true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal.html
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-bool equal(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred);
-
-
-/*! \p equal returns \c true if the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> are identical when
- *  compared element-by-element, and otherwise returns \c false.
- *
- *  This version of \p equal returns \c true if and only if for every
- *  iterator \c i in <tt>[first1, last1)</tt>,
- *  <tt>binary_pred(*i, *(first2 + (i - first1)))</tt> is \c true.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param binary_pred Binary predicate used to test element equality.
- *  \return \c true, if the sequences are equal; \c false, otherwise.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p equal to compare the
- *  elements in two ranges modulo 2.
- *
- *  \code
- *  #include <thrust/equal.h>
- *  
- *  __host__ __device__
- *  struct compare_modulo_two
- *  {
- *    bool operator()(int x, int y)
- *    {
- *      return (x % 2) == (y % 2);
- *    }
- *  };
- *  ...
- *  int x[5] = {0, 2, 4, 6, 8, 10};
- *  int y[5] = {1, 3, 5, 7, 9, 11};
- *
- *  bool result = thrust::equal(x, x + 5, y, compare_modulo_two());
- *
- *  // result is true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/equal.html
- */
-template <typename InputIterator1, typename InputIterator2, 
-          typename BinaryPredicate>
-bool equal(InputIterator1 first1, InputIterator1 last1,
-           InputIterator2 first2, BinaryPredicate binary_pred);
-
-
-/*! \} // end comparisons
- *  \} // end reductions
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/equal.inl>
-
diff --git a/compat/thrust/execution_policy.h b/compat/thrust/execution_policy.h
deleted file mode 100644
index a5b61e95b4..0000000000
--- a/compat/thrust/execution_policy.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/execution_policy.h
- *  \brief Thrust execution policies.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// get the definition of thrust::execution_policy
-#include <thrust/detail/execution_policy.h>
-
-// #include the host system's execution_policy header
-#define __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_HOST_SYSTEM_ROOT/execution_policy.h>
-#include __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER
-#undef __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER
-
-// #include the device system's execution_policy.h header
-#define __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/execution_policy.h>
-#include __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER
-
-namespace thrust
-{
-
-
-/*! \cond
- */
-
-
-namespace detail
-{
-
-
-typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::detail::par_t host_t;
-
-
-typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::detail::par_t device_t;
-
-
-} // end detail
-
-
-/*! \endcond
- */
-
-
-/*! \addtogroup execution_policies Parallel Execution Policies
- *  \{
- */
-
-
-// define execution_policy for the purpose of Doxygenating it
-// it is actually defined elsewhere
-#if 0
-/*! \p execution_policy is the base class for all Thrust parallel execution policies
- *  like \p thrust::host, \p thrust::device, and each backend system's tag type.
- *
- *  Custom user-defined backends should derive a policy from this type in order to
- *  interoperate with Thrust algorithm dispatch.
- *
- *  The following code snippet demonstrates how to derive a standalone custom execution policy
- *  from \p thrust::execution_policy to implement a backend which only implements \p for_each:
- *
- *  \code
- *  #include <thrust/execution_policy.h>
- *  #include <iostream>
- *
- *  // define a type derived from thrust::execution_policy to distinguish our custom execution policy:
- *  struct my_policy : thrust::execution_policy<my_policy> {};
- *
- *  // overload for_each on my_policy
- *  template<typename Iterator, typename Function>
- *  Iterator for_each(my_policy, Iterator first, Iterator last, Function f)
- *  {
- *    std::cout << "Hello, world from for_each(my_policy)!" << std::endl;
- *
- *    for(; first < last; ++first)
- *    {
- *      f(*first);
- *    }
- *
- *    return first;
- *  }
- *
- *  struct ignore_argument
- *  {
- *    void operator()(int) {}
- *  };
- *
- *  int main()
- *  {
- *    int data[4];
- *
- *    // dispatch thrust::for_each using our custom policy:
- *    my_policy exec;
- *    thrust::for_each(exec, data, data + 4, ignore_argument());
- *
- *    // can't dispatch thrust::transform because no overload exists for my_policy:
- *    //thrust::transform(exec, data, data, + 4, data, thrust::identity<int>()); // error!
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see host_execution_policy
- *  \see device_execution_policy
- */
-template<typename DerivedPolicy>
-struct execution_policy : thrust::detail::execution_policy_base<DerivedPolicy>
-{};
-#endif
-
-
-/*! \p host_execution_policy is the base class for all Thrust parallel execution policies
- *  which are derived from Thrust's default host backend system configured with the \p THRUST_HOST_SYSTEM
- *  macro.
- *
- *  Custom user-defined backends which wish to inherit the functionality of Thrust's host backend system
- *  should derive a policy from this type in order to interoperate with Thrust algorithm dispatch.
- *
- *  The following code snippet demonstrates how to derive a standalone custom execution policy from
- *  \p thrust::host_execution_policy to implement a backend which specializes \p for_each while inheriting
- *  the behavior of every other algorithm from the host system:
- *
- *  \code
- *  #include <thrust/execution_policy.h>
- *  #include <iostream>
- *
- *  // define a type derived from thrust::host_execution_policy to distinguish our custom execution policy:
- *  struct my_policy : thrust::host_execution_policy<my_policy> {};
- *
- *  // overload for_each on my_policy
- *  template<typename Iterator, typename Function>
- *  Iterator for_each(my_policy, Iterator first, Iterator last, Function f)
- *  {
- *    std::cout << "Hello, world from for_each(my_policy)!" << std::endl;
- *
- *    for(; first < last; ++first)
- *    {
- *      f(*first);
- *    }
- *
- *    return first;
- *  }
- *
- *  struct ignore_argument
- *  {
- *    void operator()(int) {}
- *  };
- *
- *  int main()
- *  {
- *    int data[4];
- *
- *    // dispatch thrust::for_each using our custom policy:
- *    my_policy exec;
- *    thrust::for_each(exec, data, data + 4, ignore_argument());
- *
- *    // dispatch thrust::transform whose behavior our policy inherits
- *    thrust::transform(exec, data, data, + 4, data, thrust::identity<int>());
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see execution_policy
- *  \see device_execution_policy
- */
-template<typename DerivedPolicy>
-  struct host_execution_policy
-    : thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p device_execution_policy is the base class for all Thrust parallel execution policies
- *  which are derived from Thrust's default device backend system configured with the \p THRUST_DEVICE_SYSTEM
- *  macro.
- *
- *  Custom user-defined backends which wish to inherit the functionality of Thrust's device backend system
- *  should derive a policy from this type in order to interoperate with Thrust algorithm dispatch.
- *
- *  The following code snippet demonstrates how to derive a standalone custom execution policy from
- *  \p thrust::device_execution_policy to implement a backend which specializes \p for_each while inheriting
- *  the behavior of every other algorithm from the device system:
- *
- *  \code
- *  #include <thrust/execution_policy.h>
- *  #include <iostream>
- *
- *  // define a type derived from thrust::device_execution_policy to distinguish our custom execution policy:
- *  struct my_policy : thrust::device_execution_policy<my_policy> {};
- *
- *  // overload for_each on my_policy
- *  template<typename Iterator, typename Function>
- *  Iterator for_each(my_policy, Iterator first, Iterator last, Function f)
- *  {
- *    std::cout << "Hello, world from for_each(my_policy)!" << std::endl;
- *
- *    for(; first < last; ++first)
- *    {
- *      f(*first);
- *    }
- *
- *    return first;
- *  }
- *
- *  struct ignore_argument
- *  {
- *    void operator()(int) {}
- *  };
- *
- *  int main()
- *  {
- *    int data[4];
- *
- *    // dispatch thrust::for_each using our custom policy:
- *    my_policy exec;
- *    thrust::for_each(exec, data, data + 4, ignore_argument());
- *
- *    // dispatch thrust::transform whose behavior our policy inherits
- *    thrust::transform(exec, data, data, + 4, data, thrust::identity<int>());
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see execution_policy
- *  \see host_execution_policy
- */
-template<typename DerivedPolicy>
-  struct device_execution_policy
-    : thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p thrust::host is the default parallel execution policy associated with Thrust's host backend system
- *  configured by the \p THRUST_HOST_SYSTEM macro.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target
- *  algorithm dispatch at Thrust's host system by providing \p thrust::host as an algorithm parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as
- *  \p thrust::host_vector.
- *
- *  Note that even though \p thrust::host targets the host CPU, it is a parallel execution policy. That is,
- *  the order that an algorithm invokes functors or dereferences iterators is not defined.
- *
- *  The type of \p thrust::host is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::host to explicitly dispatch an invocation
- *  of \p thrust::for_each to the host backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  int vec[3];
- *  vec[0] = 0; vec[1] = 1; vec[2] = 2;
- *
- *  thrust::for_each(thrust::host, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see host_execution_policy
- *  \see thrust::device
- */
-static const detail::host_t host;
-
-
-/*! \p thrust::device is the default parallel execution policy associated with Thrust's device backend system
- *  configured by the \p THRUST_DEVICE_SYSTEM macro.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target
- *  algorithm dispatch at Thrust's device system by providing \p thrust::device as an algorithm parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as
- *  \p thrust::device_vector or to avoid wrapping e.g. raw pointers allocated by the CUDA API with types
- *  such as \p thrust::device_ptr.
- *
- *  The user must take care to guarantee that the iterators provided to an algorithm are compatible with
- *  the device backend system. For example, raw pointers allocated by <tt>std::malloc</tt> typically
- *  cannot be dereferenced by a GPU. For this reason, raw pointers allocated by host APIs should not be mixed
- *  with a \p thrust::device algorithm invocation when the device backend is CUDA.
- *
- *  The type of \p thrust::device is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::device to explicitly dispatch an invocation
- *  of \p thrust::for_each to the device backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  thrust::device_vector<int> d_vec[3];
- *  d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
- *
- *  thrust::for_each(thrust::device, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see host_execution_policy
- *  \see thrust::device
- */
-static const detail::device_t device;
-
-
-/*! \}
- */
-
-
-} // end thrust
-
diff --git a/compat/thrust/extrema.h b/compat/thrust/extrema.h
deleted file mode 100644
index 335bcd1e6b..0000000000
--- a/compat/thrust/extrema.h
+++ /dev/null
@@ -1,798 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file extrema.h
- *  \brief Functions for computing computing extremal values
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! This version of \p min returns the smaller of two values, given a comparison operation.
- *  \param lhs The first value to compare.
- *  \param rhs The second value to compare.
- *  \param comp A comparison operation.
- *  \return The smaller element.
- *
- *  \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">BinaryPredicate</a>.
- *
- *  The following code snippet demonstrates how to use \p min to compute the smaller of two
- *  key-value objects.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value a = {13, 0};
- *  key_value b = { 7, 1);
- *
- *  key_value smaller = thrust::min(a, b, compare_key_value());
- *
- *  // smaller is {7, 1}
- *  \endcode
- *
- *  \note Returns the first argument when the arguments are equivalent.
- *  \see max
- */
-template<typename T, typename BinaryPredicate>
-__host__ __device__
-  T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp);
-
-
-/*! This version of \p min returns the smaller of two values.
- *  \param lhs The first value to compare.
- *  \param rhs The second value to compare.
- *  \return The smaller element.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p min to compute the smaller of two
- *  integers.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  int a = 13;
- *  int b = 7;
- *
- *  int smaller = thrust::min(a, b);
- *
- *  // smaller is 7
- *  \endcode
- *
- *  \note Returns the first argument when the arguments are equivalent.
- *  \see max
- */
-template<typename T>
-__host__ __device__
-  T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs);
-
-
-/*! This version of \p max returns the larger of two values, given a comparison operation.
- *  \param lhs The first value to compare.
- *  \param rhs The second value to compare.
- *  \param comp A comparison operation.
- *  \return The larger element.
- *
- *  \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">BinaryPredicate</a>.
- *
- *  The following code snippet demonstrates how to use \p max to compute the larger of two
- *  key-value objects.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value a = {13, 0};
- *  key_value b = { 7, 1);
- *
- *  key_value larger = thrust::max(a, b, compare_key_value());
- *
- *  // larger is {13, 0}
- *  \endcode
- *
- *  \note Returns the first argument when the arguments are equivalent.
- *  \see min
- */
-template<typename T, typename BinaryPredicate>
-__host__ __device__
-  T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp);
-
-
-/*! This version of \p max returns the larger of two values.
- *  \param lhs The first value to compare.
- *  \param rhs The second value to compare.
- *  \return The larger element.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p max to compute the larger of two
- *  integers.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  int a = 13;
- *  int b = 7;
- *
- *  int larger = thrust::min(a, b);
- *
- *  // larger is 13
- *  \endcode
- *
- *  \note Returns the first argument when the arguments are equivalent.
- *  \see min
- */
-template<typename T>
-__host__ __device__
-  T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs);
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup extrema
- *  \ingroup reductions
- *  \{
- */
-
-/*! \p min_element finds the smallest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value smaller
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p min_element differ in how they define whether one element is
- *  less than another. This version compares objects using \c operator<. Specifically,
- *  this version of \p min_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>*j < *i</tt> is
- *  \c false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return An iterator pointing to the smallest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int *result = thrust::min_element(thrust::host, data, data + 6);
- *
- *  // result is data + 1
- *  // *result is 0
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/min_element.html 
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator min_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last);
-
-
-/*! \p min_element finds the smallest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value smaller
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p min_element differ in how they define whether one element is
- *  less than another. This version compares objects using \c operator<. Specifically,
- *  this version of \p min_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>*j < *i</tt> is
- *  \c false.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return An iterator pointing to the smallest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int *result = thrust::min_element(data, data + 6);
- *
- *  // result is data + 1
- *  // *result is 0
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/min_element.html 
- */
-template <typename ForwardIterator>
-ForwardIterator min_element(ForwardIterator first, ForwardIterator last);
-
-
-/*! \p min_element finds the smallest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value smaller
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p min_element differ in how they define whether one element is
- *  less than another. This version compares objects using a function object \p comp.
- *  Specifically, this version of \p min_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>comp(*j, *i)</tt> is
- *  \c false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return An iterator pointing to the smallest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p min_element to find the smallest element
- *  of a collection of key-value pairs using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  key_value *smallest = thrust::min_element(thrust::host, data, data + 4, compare_key_value());
- *
- *  // smallest == data + 1
- *  // *smallest == {0,7}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/min_element.html 
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp);
-
-
-/*! \p min_element finds the smallest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value smaller
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p min_element differ in how they define whether one element is
- *  less than another. This version compares objects using a function object \p comp.
- *  Specifically, this version of \p min_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>comp(*j, *i)</tt> is
- *  \c false.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return An iterator pointing to the smallest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p min_element to find the smallest element
- *  of a collection of key-value pairs.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  key_value *smallest = thrust::min_element(data, data + 4, compare_key_value());
- *
- *  // smallest == data + 1
- *  // *smallest == {0,7}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/min_element.html 
- */
-template <typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(ForwardIterator first, ForwardIterator last,
-                            BinaryPredicate comp);
-
-
-/*! \p max_element finds the largest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value larger
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p max_element differ in how they define whether one element is
- *  greater than another. This version compares objects using \c operator<. Specifically,
- *  this version of \p max_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>*i < *j</tt> is
- *  \c false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return An iterator pointing to the largest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam A Thrust backend system.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int *result = thrust::max_element(thrust::host, data, data + 6);
- *
- *  // *result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/max_element.html 
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator max_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last);
-
-
-/*! \p max_element finds the largest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value larger
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p max_element differ in how they define whether one element is
- *  greater than another. This version compares objects using \c operator<. Specifically,
- *  this version of \p max_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>*i < *j</tt> is
- *  \c false.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return An iterator pointing to the largest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int *result = thrust::max_element(data, data + 6);
- *
- *  // *result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/max_element.html 
- */
-template <typename ForwardIterator>
-ForwardIterator max_element(ForwardIterator first, ForwardIterator last);
-
-
-/*! \p max_element finds the largest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value larger
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p max_element differ in how they define whether one element is
- *  less than another. This version compares objects using a function object \p comp.
- *  Specifically, this version of \p max_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>comp(*i, *j)</tt> is
- *  \c false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return An iterator pointing to the largest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p max_element to find the largest element
- *  of a collection of key-value pairs using the \p thrust::host execution policy for parallelization.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  key_value *largest = thrust::max_element(thrust::host, data, data + 4, compare_key_value());
- *
- *  // largest == data + 3
- *  // *largest == {6,1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/max_element.html 
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp);
-
-
-/*! \p max_element finds the largest element in the range <tt>[first, last)</tt>.
- *  It returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that no other iterator in <tt>[first, last)</tt> points to a value larger
- *  than \c *i. The return value is \p last if and only if <tt>[first, last)</tt> is an
- *  empty range.
- *
- *  The two versions of \p max_element differ in how they define whether one element is
- *  less than another. This version compares objects using a function object \p comp.
- *  Specifically, this version of \p max_element returns the first iterator \c i in <tt>[first, last)</tt>
- *  such that, for every iterator \c j in <tt>[first, last)</tt>, <tt>comp(*i, *j)</tt> is
- *  \c false.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return An iterator pointing to the largest element of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p max_element to find the largest element
- *  of a collection of key-value pairs.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  key_value *largest = thrust::max_element(data, data + 4, compare_key_value());
- *
- *  // largest == data + 3
- *  // *largest == {6,1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/max_element.html 
- */
-template <typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(ForwardIterator first, ForwardIterator last,
-                            BinaryPredicate comp);
-
-
-/*! \p minmax_element finds the smallest and largest elements in the range <tt>[first, last)</tt>.
- *  It returns a pair of iterators <tt>(imin, imax)</tt> where \c imin is the same iterator
- *  returned by \p min_element and \c imax is the same iterator returned by \p max_element.
- *  This function is potentially more efficient than separate calls to \p min_element and \p max_element.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return A pair of iterator pointing to the smallest and largest elements of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  thrust::pair<int *, int *> result = thrust::minmax_element(thrust::host, data, data + 6);
- *
- *  // result.first is data + 1
- *  // result.second is data + 5
- *  // *result.first is 0
- *  // *result.second is 3
- *  \endcode
- *
- *  \see min_element
- *  \see max_element
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last);
-
-
-/*! \p minmax_element finds the smallest and largest elements in the range <tt>[first, last)</tt>.
- *  It returns a pair of iterators <tt>(imin, imax)</tt> where \c imin is the same iterator
- *  returned by \p min_element and \c imax is the same iterator returned by \p max_element.
- *  This function is potentially more efficient than separate calls to \p min_element and \p max_element.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return A pair of iterator pointing to the smallest and largest elements of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \c ForwardIterator's \c value_type is a model of
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  thrust::pair<int *, int *> result = thrust::minmax_element(data, data + 6);
- *
- *  // result.first is data + 1
- *  // result.second is data + 5
- *  // *result.first is 0
- *  // *result.second is 3
- *  \endcode
- *
- *  \see min_element
- *  \see max_element
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf
- */
-template <typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(ForwardIterator first, 
-                                                             ForwardIterator last);
-
-
-/*! \p minmax_element finds the smallest and largest elements in the range <tt>[first, last)</tt>.
- *  It returns a pair of iterators <tt>(imin, imax)</tt> where \c imin is the same iterator
- *  returned by \p min_element and \c imax is the same iterator returned by \p max_element.
- *  This function is potentially more efficient than separate calls to \p min_element and \p max_element.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return A pair of iterator pointing to the smallest and largest elements of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements
- *  of a collection of key-value pairs using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/pair.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  thrust::pair<key_value*,key_value*> extrema = thrust::minmax_element(thrust::host, data, data + 4, compare_key_value());
- *
- *  // extrema.first   == data + 1
- *  // *extrema.first  == {0,7}
- *  // extrema.second  == data + 3
- *  // *extrema.second == {6,1}
- *  \endcode
- *
- *  \see min_element
- *  \see max_element
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp);
-
-
-/*! \p minmax_element finds the smallest and largest elements in the range <tt>[first, last)</tt>.
- *  It returns a pair of iterators <tt>(imin, imax)</tt> where \c imin is the same iterator
- *  returned by \p min_element and \c imax is the same iterator returned by \p max_element.
- *  This function is potentially more efficient than separate calls to \p min_element and \p max_element.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp A binary predicate used for comparison.
- *  \return A pair of iterator pointing to the smallest and largest elements of the range <tt>[first, last)</tt>,
- *          if it is not an empty range; \p last, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \p comp's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements
- *  of a collection of key-value pairs.
- *
- *  \code
- *  #include <thrust/extrema.h>
- *  #include <thrust/pair.h>
- *
- *  struct key_value
- *  {
- *    int key;
- *    int value;
- *  };
- *
- *  struct compare_key_value
- *  {
- *    __host__ __device__
- *    bool operator()(key_value lhs, key_value rhs)
- *    {
- *      return lhs.key < rhs.key;
- *    }
- *  };
- *
- *  ...
- *  key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} };
- *
- *  thrust::pair<key_value*,key_value*> extrema = thrust::minmax_element(data, data + 4, compare_key_value());
- *
- *  // extrema.first   == data + 1
- *  // *extrema.first  == {0,7}
- *  // extrema.second  == data + 3
- *  // *extrema.second == {6,1}
- *  \endcode
- *
- *  \see min_element
- *  \see max_element
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf
- */
-template <typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp);
-
-/*! \} // end extrema
- *  \} // end reductions
- */
-
-} // end thrust
-
-#include <thrust/detail/extrema.inl>
-#include <thrust/detail/minmax.h>
-
diff --git a/compat/thrust/fill.h b/compat/thrust/fill.h
deleted file mode 100644
index b492cec9dd..0000000000
--- a/compat/thrust/fill.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fill.h
- *  \brief Fills a range with a constant value
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations
- *  \addtogroup filling
- *  \ingroup transformations
- *  \{
- */
-
-
-/*! \p fill assigns the value \p value to every element in
- *  the range <tt>[first, last)</tt>. That is, for every
- *  iterator \c i in <tt>[first, last)</tt>, it performs
- *  the assignment <tt>*i = value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param value The value to be copied.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's
- *  elements to a given value using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/fill.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> v(4);
- *  thrust::fill(thrust::device, v.begin(), v.end(), 137);
- *
- *  // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/fill.html
- *  \see \c fill_n
- *  \see \c uninitialized_fill
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void fill(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value);
-
-
-/*! \p fill assigns the value \p value to every element in
- *  the range <tt>[first, last)</tt>. That is, for every
- *  iterator \c i in <tt>[first, last)</tt>, it performs
- *  the assignment <tt>*i = value</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param value The value to be copied.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's
- *  elements to a given value.
- *
- *  \code
- *  #include <thrust/fill.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> v(4);
- *  thrust::fill(v.begin(), v.end(), 137);
- *
- *  // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/fill.html
- *  \see \c fill_n
- *  \see \c uninitialized_fill
- */
-template<typename ForwardIterator, typename T>
-  void fill(ForwardIterator first,
-            ForwardIterator last,
-            const T &value);
-
-
-/*! \p fill_n assigns the value \p value to every element in
- *  the range <tt>[first, first+n)</tt>. That is, for every
- *  iterator \c i in <tt>[first, first+n)</tt>, it performs
- *  the assignment <tt>*i = value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param n The size of the sequence.
- *  \param value The value to be copied.
- *  \return <tt>first + n</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type.
- *
- *  The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's
- *  elements to a given value using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/fill.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> v(4);
- *  thrust::fill_n(thrust::device, v.begin(), v.size(), 137);
- *
- *  // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/fill_n.html
- *  \see \c fill
- *  \see \c uninitialized_fill_n
- */
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value);
-
-
-/*! \p fill_n assigns the value \p value to every element in
- *  the range <tt>[first, first+n)</tt>. That is, for every
- *  iterator \c i in <tt>[first, first+n)</tt>, it performs
- *  the assignment <tt>*i = value</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param n The size of the sequence.
- *  \param value The value to be copied.
- *  \return <tt>first + n</tt>
- *
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type.
- *
- *  The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's
- *  elements to a given value.
- *
- *  \code
- *  #include <thrust/fill.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> v(4);
- *  thrust::fill_n(v.begin(), v.size(), 137);
- *
- *  // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/fill_n.html
- *  \see \c fill
- *  \see \c uninitialized_fill_n
- */
-template<typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(OutputIterator first,
-                        Size n,
-                        const T &value);
-
-
-/*! \} // end filling
- *  \} // transformations
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/fill.inl>
-
diff --git a/compat/thrust/find.h b/compat/thrust/find.h
deleted file mode 100644
index fa01ded500..0000000000
--- a/compat/thrust/find.h
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file find.h
- *  \brief Locating values in (unsorted) ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup searching
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p find returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>*i == value</tt>
- *  or \c last if no such iterator exists.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param value The value to find.
- *  \return The first iterator \c i such that <tt>*i == value</tt> or \c last.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \p InputIterator's \c value_type is equality comparable to type \c T.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">EqualityComparable</a>. 
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find(thrust::device, input.begin(), input.end(), 3); // returns input.first() + 2
- *  iter = thrust::find(thrust::device, input.begin(), input.end(), 5); // returns input.first() + 1
- *  iter = thrust::find(thrust::device, input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see find_if
- *  \see mismatch
- */
-template<typename DerivedPolicy, typename InputIterator, typename T>
-InputIterator find(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   const T& value);
-
-
-/*! \p find returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>*i == value</tt>
- *  or \c last if no such iterator exists.
- *
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param value The value to find.
- *  \return The first iterator \c i such that <tt>*i == value</tt> or \c last.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \p InputIterator's \c value_type is equality comparable to type \c T.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">EqualityComparable</a>. 
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find(input.begin(), input.end(), 3); // returns input.first() + 2
- *  iter = thrust::find(input.begin(), input.end(), 5); // returns input.first() + 1
- *  iter = thrust::find(input.begin(), input.end(), 9); // returns input.end()
- *  \endcode
- *
- *  \see find_if
- *  \see mismatch
- */
-template <typename InputIterator, typename T>
-InputIterator find(InputIterator first,
-                   InputIterator last,
-                   const T& value);
-
-
-/*! \p find_if returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c true
- *  or \c last if no such iterator exists.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param pred A predicate used to test range elements.
- *  \return The first iterator \c i such that <tt>pred(*i)</tt> is \c true, or \c last.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  struct greater_than_four
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 4;
- *    }
- *  };
- *
- *  struct greater_than_ten
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 10;
- *    }
- *  };
- *
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first() + 1
- *
- *  iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_ten());  // returns input.end()
- *  \endcode
- *
- *  \see find
- *  \see find_if_not
- *  \see mismatch
- */
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-
-/*! \p find_if returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c true
- *  or \c last if no such iterator exists.
- *
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param pred A predicate used to test range elements.
- *  \return The first iterator \c i such that <tt>pred(*i)</tt> is \c true, or \c last.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *
- *  struct greater_than_four
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 4;
- *    }
- *  };
- *
- *  struct greater_than_ten
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 10;
- *    }
- *  };
- *
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find_if(input.begin(), input.end(), greater_than_four()); // returns input.first() + 1
- *
- *  iter = thrust::find_if(input.begin(), input.end(), greater_than_ten());  // returns input.end()
- *  \endcode
- *
- *  \see find
- *  \see find_if_not
- *  \see mismatch
- */
-template <typename InputIterator, typename Predicate>
-InputIterator find_if(InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-
-/*! \p find_if_not returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c false
- *  or \c last if no such iterator exists.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param pred A predicate used to test range elements.
- *  \return The first iterator \c i such that <tt>pred(*i)</tt> is \c false, or \c last.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  struct greater_than_four
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 4;
- *    }
- *  };
- *
- *  struct greater_than_ten
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 10;
- *    }
- *  };
- *
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first()
- *
- *  iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_ten());  // returns input.first()
- *  \endcode
- *
- *  \see find
- *  \see find_if
- *  \see mismatch
- */
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if_not(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          Predicate pred);
-
-
-/*! \p find_if_not returns the first iterator \c i in the range 
- *  <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c false
- *  or \c last if no such iterator exists.
- *
- *  \param first Beginning of the sequence to search.
- *  \param last End of the sequence to search.
- *  \param pred A predicate used to test range elements.
- *  \return The first iterator \c i such that <tt>pred(*i)</tt> is \c false, or \c last.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/find.h>
- *  #include <thrust/device_vector.h>
- *
- *  struct greater_than_four
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 4;
- *    }
- *  };
- *
- *  struct greater_than_ten
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x > 10;
- *    }
- *  };
- *
- *  ...
- *  thrust::device_vector<int> input(4);
- *
- *  input[0] = 0;
- *  input[1] = 5;
- *  input[2] = 3;
- *  input[3] = 7;
- *
- *  thrust::device_vector<int>::iterator iter;
- *
- *  iter = thrust::find_if_not(input.begin(), input.end(), greater_than_four()); // returns input.first()
- *
- *  iter = thrust::find_if_not(input.begin(), input.end(), greater_than_ten());  // returns input.first()
- *  \endcode
- *
- *  \see find
- *  \see find_if
- *  \see mismatch
- */
-template <typename InputIterator, typename Predicate>
-InputIterator find_if_not(InputIterator first,
-                          InputIterator last,
-                          Predicate pred);
-
-/*! \} // end searching
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/find.inl>
-
diff --git a/compat/thrust/for_each.h b/compat/thrust/for_each.h
deleted file mode 100644
index efab9d8fab..0000000000
--- a/compat/thrust/for_each.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- * *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.h
- *  \brief Applies a function to each element in a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup modifying
- *  \ingroup transformations
- *  \{
- */
-
-
-/*! \p for_each applies the function object \p f to each element
- *  in the range <tt>[first, last)</tt>; \p f's return value, if any,
- *  is ignored. Unlike the C++ Standard Template Library function
- *  <tt>std::for_each</tt>, this version offers no guarantee on
- *  order of execution. For this reason, this version of \p for_each
- *  does not return a copy of the function object.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param f The function object to apply to the range <tt>[first, last)</tt>.
- *  \return last
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction">Unary Function</a>,
- *          and \p UnaryFunction does not apply any non-constant operation through its argument.
- *
- *  The following code snippet demonstrates how to use \p for_each to print the elements
- *  of a \p std::device_vector using the \p thrust::device parallelization policy:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdio>
- *  ...
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      // note that using printf in a __device__ function requires
- *      // code compiled for a GPU with compute capability 2.0 or
- *      // higher (nvcc --arch=sm_20)
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  thrust::device_vector<int> d_vec(3);
- *  d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
- *
- *  thrust::for_each(thrust::device, d_vec.begin(), d_vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see for_each_n
- *  \see http://www.sgi.com/tech/stl/for_each.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f);
-
-
-/*! \p for_each_n applies the function object \p f to each element
- *  in the range <tt>[first, first + n)</tt>; \p f's return value, if any,
- *  is ignored. Unlike the C++ Standard Template Library function
- *  <tt>std::for_each</tt>, this version offers no guarantee on
- *  order of execution.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param n The size of the input sequence.
- *  \param f The function object to apply to the range <tt>[first, first + n)</tt>.
- *  \return <tt>first + n</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam Size is an integral type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction">Unary Function</a>,
- *          and \p UnaryFunction does not apply any non-constant operation through its argument.
- *
- *  The following code snippet demonstrates how to use \p for_each_n to print the elements
- *  of a \p device_vector using the \p thrust::device parallelization policy.
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      // note that using printf in a __device__ function requires
- *      // code compiled for a GPU with compute capability 2.0 or
- *      // higher (nvcc --arch=sm_20)
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  thrust::device_vector<int> d_vec(3);
- *  d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
- *
- *  thrust::for_each_n(thrust::device, d_vec.begin(), d_vec.size(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see for_each
- *  \see http://www.sgi.com/tech/stl/for_each.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator first,
-                         Size n,
-                         UnaryFunction f);
-
-/*! \p for_each applies the function object \p f to each element
- *  in the range <tt>[first, last)</tt>; \p f's return value, if any,
- *  is ignored. Unlike the C++ Standard Template Library function
- *  <tt>std::for_each</tt>, this version offers no guarantee on
- *  order of execution. For this reason, this version of \p for_each
- *  does not return a copy of the function object.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param f The function object to apply to the range <tt>[first, last)</tt>.
- *  \return last
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction">Unary Function</a>,
- *          and \p UnaryFunction does not apply any non-constant operation through its argument.
- *
- *  The following code snippet demonstrates how to use \p for_each to print the elements
- *  of a \p device_vector.
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/device_vector.h>
- *  #include <stdio.h>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      // note that using printf in a __device__ function requires
- *      // code compiled for a GPU with compute capability 2.0 or
- *      // higher (nvcc --arch=sm_20)
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  thrust::device_vector<int> d_vec(3);
- *  d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
- *
- *  thrust::for_each(d_vec.begin(), d_vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see for_each_n
- *  \see http://www.sgi.com/tech/stl/for_each.html
- */
-template<typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f);
-
-
-/*! \p for_each_n applies the function object \p f to each element
- *  in the range <tt>[first, first + n)</tt>; \p f's return value, if any,
- *  is ignored. Unlike the C++ Standard Template Library function
- *  <tt>std::for_each</tt>, this version offers no guarantee on
- *  order of execution.
- *
- *  \param first The beginning of the sequence.
- *  \param n The size of the input sequence.
- *  \param f The function object to apply to the range <tt>[first, first + n)</tt>.
- *  \return <tt>first + n</tt>
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam Size is an integral type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction">Unary Function</a>,
- *          and \p UnaryFunction does not apply any non-constant operation through its argument.
- *
- *  The following code snippet demonstrates how to use \p for_each_n to print the elements
- *  of a \p device_vector.
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/device_vector.h>
- *  #include <stdio.h>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      // note that using printf in a __device__ function requires
- *      // code compiled for a GPU with compute capability 2.0 or
- *      // higher (nvcc --arch=sm_20)
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  thrust::device_vector<int> d_vec(3);
- *  d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
- *
- *  thrust::for_each_n(d_vec.begin(), d_vec.size(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- *
- *  \see for_each
- *  \see http://www.sgi.com/tech/stl/for_each.html
- */
-template<typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(InputIterator first,
-                         Size n,
-                         UnaryFunction f);
-
-/*! \} // end modifying
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/for_each.inl>
-
diff --git a/compat/thrust/functional.h b/compat/thrust/functional.h
deleted file mode 100644
index b3d47f9179..0000000000
--- a/compat/thrust/functional.h
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file functional.h
- *  \brief Function objects and tools for manipulating them
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <functional>
-#include <thrust/detail/functional/placeholder.h>
-
-namespace thrust
-{
-
-/*! \addtogroup function_objects Function Objects
- */
-
-template<typename Operation> struct unary_traits;
-
-template<typename Operation> struct binary_traits;
-
-/*! \addtogroup function_object_adaptors Function Object Adaptors
- *  \ingroup function_objects
- *  \{
- */
-
-/*! \p unary_function is an empty base class: it contains no member functions
- *  or member variables, but only type information. The only reason it exists
- *  is to make it more convenient to define types that are models of the
- *  concept Adaptable Unary Function. Specifically, any model of Adaptable
- *  Unary Function must define nested \c typedefs. Those \c typedefs are
- *  provided by the base class \p unary_function.
- *
- *  The following code snippet demonstrates how to construct an 
- *  Adaptable Unary Function using \p unary_function.
- *
- *  \code
- *  struct sine : public thrust::unary_function<float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x) { return sinf(x); }
- *  };
- *  \endcode
- *
- *  \note unary_function is currently redundant with the C++ STL type
- *  \c std::unary_function. We reserve it here for potential additional
- *  functionality at a later date.
- *
- *  \see http://www.sgi.com/tech/stl/unary_function.html
- *  \see binary_function
- */
-template<typename Argument,
-         typename Result>
-  struct unary_function
-    : public std::unary_function<Argument, Result>
-{
-}; // end unary_function
-
-/*! \p binary_function is an empty base class: it contains no member functions
- *  or member variables, but only type information. The only reason it exists
- *  is to make it more convenient to define types that are models of the
- *  concept Adaptable Binary Function. Specifically, any model of Adaptable
- *  Binary Function must define nested \c typedefs. Those \c typedefs are
- *  provided by the base class \p binary_function.
- *
- *  The following code snippet demonstrates how to construct an 
- *  Adaptable Binary Function using \p binary_function.
- *
- *  \code
- *  struct exponentiate : public thrust::binary_function<float,float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x, float y) { return powf(x,y); }
- *  };
- *  \endcode
- *
- *  \note binary_function is currently redundant with the C++ STL type
- *  \c std::binary_function. We reserve it here for potential additional
- *  functionality at a later date.
- *
- *  \see http://www.sgi.com/tech/stl/binary_function.html
- *  \see unary_function
- */
-template<typename Argument1,
-         typename Argument2,
-         typename Result>
-  struct binary_function
-    : public std::binary_function<Argument1, Argument2, Result>
-{
-}; // end binary_function
-
-/*! \}
- */
-
-
-/*! \addtogroup predefined_function_objects Predefined Function Objects
- *  \ingroup function_objects
- */
-
-/*! \addtogroup arithmetic_operations Arithmetic Operations
- *  \ingroup predefined_function_objects
- *  \{
- */
-
-/*! \p plus is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>plus<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x+y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x+y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>plus</tt> to sum two
- *  device_vectors of \c floats.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *  thrust::device_vector<float> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 75);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                     thrust::plus<float>());
- *  // V3 is now {76, 77, 78, ..., 1075}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/plus.html
- *  \see binary_function
- */
-template<typename T>
-  struct plus : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs + rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs + rhs;}
-}; // end plus
-
-/*! \p minus is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>minus<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x-y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x-y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>minus</tt> to subtract
- *  a device_vector of \c floats from another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *  thrust::device_vector<float> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 75);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                     thrust::minus<float>());
- *  // V3 is now {-74, -75, -76, ..., -925}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/minus.html
- *  \see binary_function
- */
-template<typename T>
-  struct minus : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs - rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs - rhs;}
-}; // end minus
-
-/*! \p multiplies is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>minus<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x*y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x*y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>multiplies</tt> to multiply
- *  two device_vectors of \c floats.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *  thrust::device_vector<float> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 75);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                     thrust::multiplies<float>());
- *  // V3 is now {75, 150, 225, ..., 75000}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/multiplies.html
- *  \see binary_function
- */
-template<typename T>
-  struct multiplies : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs * rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs * rhs;}
-}; // end multiplies
-
-/*! \p divides is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>divides<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x/y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x/y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>divides</tt> to divide
- *  one device_vectors of \c floats by another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *  thrust::device_vector<float> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 75);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                     thrust::divides<float>());
- *  // V3 is now {1/75, 2/75, 3/75, ..., 1000/75}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/divides.html
- *  \see binary_function
- */
-template<typename T>
-  struct divides : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs / rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs / rhs;}
-}; // end divides
-
-/*! \p modulus is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>divides<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x%y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x%y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>modulus</tt> to take
- *  the modulus of one device_vectors of \c floats by another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *  thrust::device_vector<float> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 75);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                     thrust::modulus<int>());
- *  // V3 is now {1%75, 2%75, 3%75, ..., 1000%75}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/modulus.html
- *  \see binary_function
- */
-template<typename T>
-  struct modulus : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs % rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs % rhs;}
-}; // end modulus
-
-/*! \p negate is a function object. Specifically, it is an Adaptable Unary Function.
- *  If \c f is an object of class <tt>negate<T></tt>, and \c x is an object
- *  of class \c T, then <tt>f(x)</tt> returns <tt>-x</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x is an object of type \p T, then <tt>-x</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>negate</tt> to negate
- *  the element of a device_vector of \c floats.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<float> V1(N);
- *  thrust::device_vector<float> V2(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(),
- *                     thrust::negate<float>());
- *  // V2 is now {-1, -2, -3, ..., -1000}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/negate.html
- *  \see unary_function
- */
-template<typename T>
-  struct negate : public unary_function<T,T>
-{
-  /*! Function call operator. The return value is <tt>-x</tt>.
-   */
-  __host__ __device__ T operator()(const T &x) const {return -x;}
-}; // end negate
-
-/*! \}
- */
-
-/*! \addtogroup comparison_operations Comparison Operations
- *  \ingroup predefined_function_objects
- *  \{
- */
-
-/*! \p equal_to is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>equal_to<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x == y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/equal_to.html
- *  \see binary_function
- */
-template<typename T>
-  struct equal_to : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs == rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs == rhs;}
-}; // end equal_to
-
-/*! \p not_equal_to is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>not_equal_to<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x != y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/not_equal_to.html
- *  \see binary_function
- */
-template<typename T>
-  struct not_equal_to : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs != rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs != rhs;}
-}; // end not_equal_to
-
-/*! \p greater is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>greater<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x > y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/greater.html
- *  \see binary_function
- */
-template<typename T>
-  struct greater : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs > rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs > rhs;}
-}; // end greater
-
-/*! \p less is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>less<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x < y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/less.html
- *  \see binary_function
- */
-template<typename T>
-  struct less : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs < rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs < rhs;}
-}; // end less
-
-/*! \p greater_equal is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>greater_equal<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x >= y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/greater_equal.html
- *  \see binary_function
- */
-template<typename T>
-  struct greater_equal : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs >= rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs >= rhs;}
-}; // end greater_equal
-
-/*! \p less_equal is a function object. Specifically, it is an Adaptable Binary
- *  Predicate, which means it is a function object that tests the truth or falsehood
- *  of some condition. If \c f is an object of class <tt>less_equal<T></tt> and \c x
- *  and \c y are objects of class \c T, then <tt>f(x,y)</tt> returns \c true if
- *  <tt>x <= y</tt> and \c false otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  \see http://www.sgi.com/tech/stl/less_equal.html
- *  \see binary_function
- */
-template<typename T>
-  struct less_equal : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs <= rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs <= rhs;}
-}; // end less_equal
-
-/*! \}
- */
-
-
-/*! \addtogroup logical_operations Logical Operations
- *  \ingroup predefined_function_objects
- *  \{
- */
-
-/*! \p logical_and is a function object. Specifically, it is an Adaptable Binary Predicate,
- *  which means it is a function object that tests the truth or falsehood of some condition.
- *  If \c f is an object of class <tt>logical_and<T></tt> and \c x and \c y are objects of
- *  class \c T (where \c T is convertible to \c bool) then <tt>f(x,y)</tt> returns \c true
- *  if and only if both \c x and \c y are \c true.
- *
- *  \tparam T must be convertible to \c bool.
- *
- *  \see http://www.sgi.com/tech/stl/logical_and.html
- *  \see binary_function
- */
-template<typename T>
-  struct logical_and : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs && rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs && rhs;}
-}; // end logical_and
-
-/*! \p logical_or is a function object. Specifically, it is an Adaptable Binary Predicate,
- *  which means it is a function object that tests the truth or falsehood of some condition.
- *  If \c f is an object of class <tt>logical_or<T></tt> and \c x and \c y are objects of
- *  class \c T (where \c T is convertible to \c bool) then <tt>f(x,y)</tt> returns \c true
- *  if and only if either \c x or \c y are \c true.
- *
- *  \tparam T must be convertible to \c bool.
- *
- *  \see http://www.sgi.com/tech/stl/logical_or.html
- *  \see binary_function
- */
-template<typename T>
-  struct logical_or : public binary_function<T,T,bool>
-{
-  /*! Function call operator. The return value is <tt>lhs || rhs</tt>.
-   */
-  __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs || rhs;}
-}; // end logical_or
-
-/*! \p logical_not is a function object. Specifically, it is an Adaptable Predicate,
- *  which means it is a function object that tests the truth or falsehood of some condition.
- *  If \c f is an object of class <tt>logical_not<T></tt> and \c x is an object of
- *  class \c T (where \c T is convertible to \c bool) then <tt>f(x)</tt> returns \c true
- *  if and only if \c x is \c false.
- *
- *  \tparam T must be convertible to \c bool.
- *
- *  The following code snippet demonstrates how to use \p logical_not to transform
- *  a device_vector of \c bools into its logical complement.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  ...
- *  thrust::device_vector<bool> V;
- *  ...
- *  thrust::transform(V.begin(), V.end(), V.begin(), thrust::logical_not<bool>());
- *  // The elements of V are now the logical complement of what they were prior
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/logical_not.html
- *  \see unary_function
- */
-template<typename T>
-  struct logical_not : public unary_function<T,bool>
-{
-  /*! Function call operator. The return value is <tt>!x</tt>.
-   */
-  __host__ __device__ bool operator()(const T &x) const {return !x;}
-}; // end logical_not
-
-/*! \}
- */
-
-/*! \addtogroup bitwise_operations Bitwise Operations
- *  \ingroup predefined_function_objects
- *  \{
- */
-
-/*! \p bit_and is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>bit_and<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x&y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x&y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>bit_and</tt> to take
- *  the bitwise AND of one device_vector of \c ints by another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<int> V1(N);
- *  thrust::device_vector<int> V2(N);
- *  thrust::device_vector<int> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 13);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                    thrust::bit_and<int>());
- *  // V3 is now {1&13, 2&13, 3&13, ..., 1000%13}
- *  \endcode
- *
- *  \see binary_function
- */
-template<typename T>
-  struct bit_and : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs & rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs & rhs;}
-}; // end bit_and
-
-/*! \p bit_or is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>bit_and<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x|y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x|y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>bit_or</tt> to take
- *  the bitwise OR of one device_vector of \c ints by another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<int> V1(N);
- *  thrust::device_vector<int> V2(N);
- *  thrust::device_vector<int> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 13);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                    thrust::bit_or<int>());
- *  // V3 is now {1|13, 2|13, 3|13, ..., 1000|13}
- *  \endcode
- *
- *  \see binary_function
- */
-template<typename T>
-  struct bit_or : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs | rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs | rhs;}
-}; // end bit_or
-
-/*! \p bit_xor is a function object. Specifically, it is an Adaptable Binary Function.
- *  If \c f is an object of class <tt>bit_and<T></tt>, and \c x and \c y are objects
- *  of class \c T, then <tt>f(x,y)</tt> returns <tt>x^y</tt>.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x and \c y are objects of type \p T, then <tt>x^y</tt> must be defined and must have a return type that is convertible to \c T.
- *
- *  The following code snippet demonstrates how to use <tt>bit_xor</tt> to take
- *  the bitwise XOR of one device_vector of \c ints by another.
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/sequence.h>
- *  #include <thrust/fill.h>
- *  #include <thrust/transform.h>
- *  ...
- *  const int N = 1000;
- *  thrust::device_vector<int> V1(N);
- *  thrust::device_vector<int> V2(N);
- *  thrust::device_vector<int> V3(N);
- *
- *  thrust::sequence(V1.begin(), V1.end(), 1);
- *  thrust::fill(V2.begin(), V2.end(), 13);
- *
- *  thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(),
- *                    thrust::bit_xor<int>());
- *  // V3 is now {1^13, 2^13, 3^13, ..., 1000^13}
- *  \endcode
- *
- *  \see binary_function
- */
-template<typename T>
-  struct bit_xor : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs ^ rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs ^ rhs;}
-}; // end bit_xor
-
-/*! \}
- */
-
-/*! \addtogroup generalized_identity_operations Generalized Identity Operations
- *  \ingroup predefined_function_objects
- *  \{
- */
-
-/*! \p identity is a Unary Function that represents the identity function: it takes
- *  a single argument \c x, and returns \c x.
- *
- *  \tparam T No requirements on \p T.
- *
- *  The following code snippet demonstrates that \p identity returns its
- *  argument.
- *
- *  \code
- *  #include <thrust/functional.h>
- *  #include <assert.h>
- *  ...
- *  int x = 137;
- *  thrust::identity<int> id;
- *  assert(x == id(x));
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/identity.html
- *  \see unary_function
- */
-template<typename T>
-  struct identity : public unary_function<T,T>
-{
-  /*! Function call operator. The return value is <tt>x</tt>.
-   */
-  __host__ __device__ const T &operator()(const T &x) const {return x;}
-}; // end identity
-
-/*! \p maximum is a function object that takes two arguments and returns the greater
- *  of the two. Specifically, it is an Adaptable Binary Function. If \c f is an
- *  object of class <tt>maximum<T></tt> and \c x and \c y are objects of class \c T
- *  <tt>f(x,y)</tt> returns \c x if <tt>x > y</tt> and \c y, otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates that \p maximum returns its
- *  greater argument.
- *
- *  \code
- *  #include <thrust/functional.h>
- *  #include <assert.h>
- *  ...
- *  int x =  137;
- *  int y = -137;
- *  thrust::maximum<int> mx;
- *  assert(x == mx(x,y));
- *  \endcode
- *
- *  \see minimum
- *  \see min
- *  \see binary_function
- */
-template<typename T>
-  struct maximum : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>rhs < lhs ? lhs : rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? rhs : lhs;}
-}; // end maximum
-
-/*! \p minimum is a function object that takes two arguments and returns the lesser
- *  of the two. Specifically, it is an Adaptable Binary Function. If \c f is an
- *  object of class <tt>minimum<T></tt> and \c x and \c y are objects of class \c T
- *  <tt>f(x,y)</tt> returns \c x if <tt>x < y</tt> and \c y, otherwise.
- *
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates that \p minimum returns its
- *  lesser argument.
- *
- *  \code
- *  #include <thrust/functional.h>
- *  #include <assert.h>
- *  ...
- *  int x =  137;
- *  int y = -137;
- *  thrust::minimum<int> mn;
- *  assert(y == mn(x,y));
- *  \endcode
- *
- *  \see maximum
- *  \see max
- *  \see binary_function
- */
-template<typename T>
-  struct minimum : public binary_function<T,T,T>
-{
-  /*! Function call operator. The return value is <tt>lhs < rhs ? lhs : rhs</tt>.
-   */
-  __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;}
-}; // end minimum
-
-/*! \p project1st is a function object that takes two arguments and returns 
- *  its first argument; the second argument is unused. It is essentially a
- *  generalization of identity to the case of a Binary Function.
- *
- *  \code
- *  #include <thrust/functional.h>
- *  #include <assert.h>
- *  ...
- *  int x =  137;
- *  int y = -137;
- *  thrust::project1st<int> pj1;
- *  assert(x == pj1(x,y));
- *  \endcode
- *
- *  \see identity
- *  \see project2nd
- *  \see binary_function
- */
-template<typename T1, typename T2>
-  struct project1st : public binary_function<T1,T2,T1>
-{
-  /*! Function call operator. The return value is <tt>lhs</tt>.
-   */
-  __host__ __device__ const T1 &operator()(const T1 &lhs, const T2 &rhs) const {return lhs;}
-}; // end project1st
-
-/*! \p project2nd is a function object that takes two arguments and returns 
- *  its second argument; the first argument is unused. It is essentially a
- *  generalization of identity to the case of a Binary Function.
- *
- *  \code
- *  #include <thrust/functional.h>
- *  #include <assert.h>
- *  ...
- *  int x =  137;
- *  int y = -137;
- *  thrust::project2nd<int> pj2;
- *  assert(y == pj2(x,y));
- *  \endcode
- *
- *  \see identity
- *  \see project1st
- *  \see binary_function
- */
-template<typename T1, typename T2>
-  struct project2nd : public binary_function<T1,T2,T2>
-{
-  /*! Function call operator. The return value is <tt>rhs</tt>.
-   */
-  __host__ __device__ const T2 &operator()(const T1 &lhs, const T2 &rhs) const {return rhs;}
-}; // end project2nd
-
-/*! \}
- */
-
-
-// odds and ends
-
-/*! \addtogroup function_object_adaptors
- *  \{
- */
-
-/*! \p unary_negate is a function object adaptor: it is an Adaptable Predicate
- *  that represents the logical negation of some other Adaptable Predicate.
- *  That is: if \c f is an object of class <tt>unary_negate<AdaptablePredicate></tt>,
- *  then there exists an object \c pred of class \c AdaptablePredicate such
- *  that <tt>f(x)</tt> always returns the same value as <tt>!pred(x)</tt>.
- *  There is rarely any reason to construct a <tt>unary_negate</tt> directly;
- *  it is almost always easier to use the helper function not1.
- *
- *  \see http://www.sgi.com/tech/stl/unary_negate.html
- *  \see not1
- */
-template<typename Predicate>
-struct unary_negate 
-    : public thrust::unary_function<typename Predicate::argument_type, bool>
-{
-  /*! Constructor takes a \p Predicate object to negate.
-   *  \param p The \p Predicate object to negate.
-   */
-  __host__ __device__
-  explicit unary_negate(Predicate p) : pred(p){}
-
-  /*! Function call operator. The return value is <tt>!pred(x)</tt>.
-   */
-  __host__ __device__
-  bool operator()(const typename Predicate::argument_type& x) { return !pred(x); }
-
-  /*! \cond */
-  Predicate pred;
-  /*! \endcond */
-}; // end unary_negate
-
-/*! \p not1 is a helper function to simplify the creation of Adaptable Predicates:
- *  it takes an Adaptable Predicate \p pred as an argument and returns a new Adaptable
- *  Predicate that represents the negation of \p pred. That is: if \c pred is an object
- *  of a type which models Adaptable Predicate, then the the type of the result
- *  \c npred of <tt>not1(pred)</tt> is also a model of Adaptable Predicate and
- *  <tt>npred(x)</tt> always returns the same value as <tt>!pred(x)</tt>.
- *
- *  \param pred The Adaptable Predicate to negate.
- *  \return A new object, <tt>npred</tt> such that <tt>npred(x)</tt> always returns
- *          the same value as <tt>!pred(x)</tt>.
- *
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/AdaptablePredicate.html">Adaptable Predicate</a>.
- *
- *  \see unary_negate
- *  \see not2
- */
-template<typename Predicate>
-  __host__ __device__
-  unary_negate<Predicate> not1(const Predicate &pred);
-
-/*! \p binary_negate is a function object adaptor: it is an Adaptable Binary 
- *  Predicate that represents the logical negation of some other Adaptable
- *  Binary Predicate. That is: if \c f is an object of class <tt>binary_negate<AdaptablePredicate></tt>,
- *  then there exists an object \c pred of class \c AdaptableBinaryPredicate
- *  such that <tt>f(x,y)</tt> always returns the same value as <tt>!pred(x,y)</tt>.
- *  There is rarely any reason to construct a <tt>binary_negate</tt> directly;
- *  it is almost always easier to use the helper function not2.
- *
- *  \see http://www.sgi.com/tech/stl/binary_negate.html
- */
-template<typename Predicate>
-struct binary_negate
-    : public thrust::binary_function<typename Predicate::first_argument_type,
-                                     typename Predicate::second_argument_type,
-                                     bool>
-{
-  /*! Constructor takes a \p Predicate object to negate.
-   *  \param p The \p Predicate object to negate.
-   */
-  __host__ __device__
-  explicit binary_negate(Predicate p) : pred(p){}
-
-  /*! Function call operator. The return value is <tt>!pred(x,y)</tt>.
-   */
-  __host__ __device__
-  bool operator()(const typename Predicate::first_argument_type& x, const typename Predicate::second_argument_type& y)
-  { 
-      return !pred(x,y); 
-  }
-
-  /*! \cond */
-  Predicate pred;
-  /*! \endcond */
-}; // end binary_negate
-
-/*! \p not2 is a helper function to simplify the creation of Adaptable Binary Predicates:
- *  it takes an Adaptable Binary Predicate \p pred as an argument and returns a new Adaptable
- *  Binary Predicate that represents the negation of \p pred. That is: if \c pred is an object
- *  of a type which models Adaptable Binary Predicate, then the the type of the result
- *  \c npred of <tt>not2(pred)</tt> is also a model of Adaptable Binary Predicate and
- *  <tt>npred(x,y)</tt> always returns the same value as <tt>!pred(x,y)</tt>.
- *
- *  \param pred The Adaptable Binary Predicate to negate.
- *  \return A new object, <tt>npred</tt> such that <tt>npred(x,y)</tt> always returns
- *          the same value as <tt>!pred(x,y)</tt>.
- *
- *  \tparam Binary Predicate is a model of <a href="http://www.sgi.com/tech/stl/AdaptableBinaryPredicate.html">Adaptable Binary Predicate</a>.
- *
- *  \see binary_negate
- *  \see not1
- */
-template<typename BinaryPredicate>
-  __host__ __device__
-  binary_negate<BinaryPredicate> not2(const BinaryPredicate &pred);
-
-/*! \}
- */
-
-
-/*! \addtogroup placeholder_objects Placeholder Objects
- *  \ingroup function_objects
- *  \{
- */
-
-
-/*! \namespace placeholders
- *  \brief Facilities for constructing simple functions inline.
- *
- *  Objects in the \p thrust::placeholders namespace may be used to create simple arithmetic functions inline
- *  in an algorithm invocation. Combining placeholders such as \p _1 and \p _2 with arithmetic operations such as \c +
- *  creates an unnamed function object which applies the operation to their arguments.
- *
- *  The type of placeholder objects is implementation-defined.
- *
- *  The following code snippet demonstrates how to use the placeholders \p _1 and \p _2 with \p thrust::transform
- *  to implement the SAXPY computation:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *
- *  int main()
- *  {
- *    thrust::device_vector<float> x(4), y(4);
- *    x[0] = 1;
- *    x[1] = 2;
- *    x[2] = 3;
- *    x[3] = 4;
- *    
- *    y[0] = 1;
- *    y[1] = 1;
- *    y[2] = 1;
- *    y[3] = 1;
- *
- *    float a = 2.0f;
- *
- *    using namespace thrust::placeholders;
- *
- *    thrust::transform(x.begin(), x.end(), y.begin(), y.begin(),
- *      a * _1 + 2
- *    );
- *
- *    // y is now {3, 5, 7, 9}
- *  }
- *  \endcode
- */
-namespace placeholders
-{
-
-
-/*! \p thrust::placeholders::_1 is the placeholder for the first function parameter.
- */
-static const thrust::detail::functional::placeholder<0>::type _1;
-
-
-/*! \p thrust::placeholders::_2 is the placeholder for the second function parameter.
- */
-static const thrust::detail::functional::placeholder<1>::type _2;
-
-
-/*! \p thrust::placeholders::_3 is the placeholder for the third function parameter.
- */
-static const thrust::detail::functional::placeholder<2>::type _3;
-
-
-/*! \p thrust::placeholders::_4 is the placeholder for the fourth function parameter.
- */
-static const thrust::detail::functional::placeholder<3>::type _4;
-
-
-/*! \p thrust::placeholders::_5 is the placeholder for the fifth function parameter.
- */
-static const thrust::detail::functional::placeholder<4>::type _5;
-
-
-/*! \p thrust::placeholders::_6 is the placeholder for the sixth function parameter.
- */
-static const thrust::detail::functional::placeholder<5>::type _6;
-
-
-/*! \p thrust::placeholders::_7 is the placeholder for the seventh function parameter.
- */
-static const thrust::detail::functional::placeholder<6>::type _7;
-
-
-/*! \p thrust::placeholders::_8 is the placeholder for the eighth function parameter.
- */
-static const thrust::detail::functional::placeholder<7>::type _8;
-
-
-/*! \p thrust::placeholders::_9 is the placeholder for the ninth function parameter.
- */
-static const thrust::detail::functional::placeholder<8>::type _9;
-
-
-/*! \p thrust::placeholders::_10 is the placeholder for the tenth function parameter.
- */
-static const thrust::detail::functional::placeholder<9>::type _10;
-
-
-} // end placeholders
-
-
-/*! \} // placeholder_objects
- */
-
-
-} // end thrust
-
-#include <thrust/detail/functional.inl>
-#include <thrust/detail/functional/operators.h>
-
diff --git a/compat/thrust/gather.h b/compat/thrust/gather.h
deleted file mode 100644
index f2b8233657..0000000000
--- a/compat/thrust/gather.h
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file gather.h
- *  \brief Irregular copying from a source range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup gathering
- *  \ingroup copying
- *  \{
- */
-
-
-/*! \p gather copies elements from a source array into a destination range according 
- *  to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>, the
- *  value <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather is the inverse of thrust::scatter.
- *
- *  The following code snippet demonstrates how to use \p gather to reorder
- *  a range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  // mark even indices with a 1; odd indices with a 0
- *  int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // gather all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10);
- *  thrust::gather(thrust::device,
- *                 d_map.begin(), d_map.end(),
- *                 d_values.begin(),
- *                 d_output.begin());
- *  // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  \endcode
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                        InputIterator                                               map_first,
-                        InputIterator                                               map_last,
-                        RandomAccessIterator                                        input_first,
-                        OutputIterator                                              result);
-
-
-/*! \p gather copies elements from a source array into a destination range according 
- *  to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>, the
- *  value <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *
- *  \tparam InputIterator must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather is the inverse of thrust::scatter.
- *
- *  The following code snippet demonstrates how to use \p gather to reorder
- *  a range.
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  // mark even indices with a 1; odd indices with a 0
- *  int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // gather all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10);
- *  thrust::gather(d_map.begin(), d_map.end(),
- *                 d_values.begin(),
- *                 d_output.begin());
- *  // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  \endcode
- */
-template<typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(InputIterator        map_first,
-                        InputIterator        map_last,
-                        RandomAccessIterator input_first,
-                        OutputIterator       result);
-
-
-/*! \p gather_if conditionally copies elements from a source array into a destination 
- *  range according to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>,
- *  such that the value of <tt>\*(stencil + (i - map_first))</tt> is \c true, the value
- *  <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param stencil Beginning of the range of predicate values.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c bool.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (map_last - map_first))</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather_if is the inverse of \p scatter_if.
- *
- *  The following code snippet demonstrates how to use \p gather_if to gather selected values from
- *  an input range using the \p thrust::device execution policy:
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *
- *  int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // select elements at even-indexed locations
- *  int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_stencil(stencil, stencil + 10);
- *
- *  // map all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10, 7);
- *  thrust::gather_if(thrust::device,
- *                    d_map.begin(), d_map.end(),
- *                    d_stencil.begin(),
- *                    d_values.begin(),
- *                    d_output.begin());
- *  // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}
- *  \endcode
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              map_first,
-                           InputIterator1                                              map_last,
-                           InputIterator2                                              stencil,
-                           RandomAccessIterator                                        input_first,
-                           OutputIterator                                              result);
-
-
-/*! \p gather_if conditionally copies elements from a source array into a destination 
- *  range according to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>,
- *  such that the value of <tt>\*(stencil + (i - map_first))</tt> is \c true, the value
- *  <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param stencil Beginning of the range of predicate values.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c bool.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (map_last - map_first))</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather_if is the inverse of \p scatter_if.
- *
- *  The following code snippet demonstrates how to use \p gather_if to gather selected values from
- *  an input range.
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *
- *  int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // select elements at even-indexed locations
- *  int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_stencil(stencil, stencil + 10);
- *
- *  // map all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10, 7);
- *  thrust::gather_if(d_map.begin(), d_map.end(),
- *                    d_stencil.begin(),
- *                    d_values.begin(),
- *                    d_output.begin());
- *  // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}
- *  \endcode
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(InputIterator1       map_first,
-                           InputIterator1       map_last,
-                           InputIterator2       stencil,
-                           RandomAccessIterator input_first,
-                           OutputIterator       result);
-
-
-/*! \p gather_if conditionally copies elements from a source array into a destination 
- *  range according to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>
- *  such that the value of <tt>pred(\*(stencil + (i - map_first)))</tt> is \c true,
- *  the value <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param stencil Beginning of the range of predicate values.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *  \param pred Predicate to apply to the stencil values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (map_last - map_first))</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather_if is the inverse of \p scatter_if.
- *
- *  The following code snippet demonstrates how to use \p gather_if to gather selected values from
- *  an input range based on an arbitrary selection function using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *
- *  int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // we will select an element when our stencil is even
- *  int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9};
- *  thrust::device_vector<int> d_stencil(stencil, stencil + 10);
- *
- *  // map all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10, 7);
- *  thrust::gather_if(thrust::device,
- *                    d_map.begin(), d_map.end(),
- *                    d_stencil.begin(),
- *                    d_values.begin(),
- *                    d_output.begin(),
- *                    is_even());
- *  // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}
- *  \endcode
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              map_first,
-                           InputIterator1                                              map_last,
-                           InputIterator2                                              stencil,
-                           RandomAccessIterator                                        input_first,
-                           OutputIterator                                              result,
-                           Predicate                                                   pred);
-
-
-/*! \p gather_if conditionally copies elements from a source array into a destination 
- *  range according to a map. For each input iterator \c i in the range <tt>[map_first, map_last)</tt>
- *  such that the value of <tt>pred(\*(stencil + (i - map_first)))</tt> is \c true,
- *  the value <tt>input_first[\*i]</tt> is assigned to <tt>*(result + (i - map_first))</tt>.
- *  \p RandomAccessIterator must permit random access.
- *
- *  \param map_first Beginning of the range of gather locations.
- *  \param map_last End of the range of gather locations.
- *  \param stencil Beginning of the range of predicate values.
- *  \param input_first Beginning of the source range.
- *  \param result Beginning of the destination range.
- *  \param pred Predicate to apply to the stencil values.
- *
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a> and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator must be a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[map_first, map_last)</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (map_last - map_first))</tt> shall not overlap the range <tt>[result, result + (map_last - map_first))</tt>.
- *
- *  \remark \p gather_if is the inverse of \p scatter_if.
- *
- *  The following code snippet demonstrates how to use \p gather_if to gather selected values from
- *  an input range based on an arbitrary selection function.
- *
- *  \code
- *  #include <thrust/gather.h>
- *  #include <thrust/device_vector.h>
- *  
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *
- *  int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // we will select an element when our stencil is even
- *  int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9};
- *  thrust::device_vector<int> d_stencil(stencil, stencil + 10);
- *
- *  // map all even indices into the first half of the range
- *  // and odd indices to the last half of the range
- *  int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10, 7);
- *  thrust::gather_if(d_map.begin(), d_map.end(),
- *                    d_stencil.begin(),
- *                    d_values.begin(),
- *                    d_output.begin(),
- *                    is_even());
- *  // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}
- *  \endcode
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(InputIterator1       map_first,
-                           InputIterator1       map_last,
-                           InputIterator2       stencil,
-                           RandomAccessIterator input_first,
-                           OutputIterator       result,
-                           Predicate            pred);
-
-/*! \} // gathering
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/gather.inl>
-
diff --git a/compat/thrust/generate.h b/compat/thrust/generate.h
deleted file mode 100644
index 1d52721a78..0000000000
--- a/compat/thrust/generate.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file generate.h
- *  \brief Fills a range with values "generated" from a function of no arguments
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations
- *  \{
- */
-
-
-/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments,
- *  to each element in the range <tt>[first,last)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element in the range of interest.
- *  \param last The last element in the range of interest.
- *  \param gen A function argument, taking no parameters, used to generate values to assign to
- *             elements in the range <tt>[first,last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam Generator is a model of <a href="http://www.sgi.com/tech/stl/Generator.html">Generator</a>,
- *          and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to fill a \c host_vector with random numbers,
- *  using the standard C library function \c rand using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/generate.h>
- *  #include <thrust/host_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdlib>
- *  ...
- *  thrust::host_vector<int> v(10);
- *  srand(13);
- *  thrust::generate(thrust::host, v.begin(), v.end(), rand);
- *
- *  // the elements of v are now pseudo-random numbers
- *  \endcode
- *
- *  \see generate_n
- *  \see http://www.sgi.com/tech/stl/generate.html
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Generator>
-  void generate(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                Generator gen);
-
-
-/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments,
- *  to each element in the range <tt>[first,last)</tt>.
- *
- *  \param first The first element in the range of interest.
- *  \param last The last element in the range of interest.
- *  \param gen A function argument, taking no parameters, used to generate values to assign to
- *             elements in the range <tt>[first,last)</tt>.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam Generator is a model of <a href="http://www.sgi.com/tech/stl/Generator.html">Generator</a>,
- *          and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to fill a \c host_vector with random numbers,
- *  using the standard C library function \c rand.
- *
- *  \code
- *  #include <thrust/generate.h>
- *  #include <thrust/host_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdlib>
- *  ...
- *  thrust::host_vector<int> v(10);
- *  srand(13);
- *  thrust::generate(v.begin(), v.end(), rand);
- *
- *  // the elements of v are now pseudo-random numbers
- *  \endcode
- *
- *  \see generate_n
- *  \see http://www.sgi.com/tech/stl/generate.html
- */
-template<typename ForwardIterator,
-         typename Generator>
-  void generate(ForwardIterator first,
-                ForwardIterator last,
-                Generator gen);
-
-
-/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments,
- *  to each element in the range <tt>[first,first + n)</tt>. The return value is <tt>first + n</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element in the range of interest.
- *  \param n The size of the range of interest.
- *  \param gen A function argument, taking no parameters, used to generate values to assign to
- *             elements in the range <tt>[first,first + n)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Size is an integral type (either signed or unsigned).
- *  \tparam Generator is a model of <a href="http://www.sgi.com/tech/stl/Generator.html">Generator</a>,
- *          and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *
- *  The following code snippet demonstrates how to fill a \c host_vector with random numbers,
- *  using the standard C library function \c rand using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/generate.h>
- *  #include <thrust/host_vector.h>
- *  #include <thrust/execution_policy.h>
- *  #include <cstdlib>
- *  ...
- *  thrust::host_vector<int> v(10);
- *  srand(13);
- *  thrust::generate_n(thrust::host, v.begin(), 10, rand);
- *
- *  // the elements of v are now pseudo-random numbers
- *  \endcode
- *
- *  \see generate
- *  \see http://www.sgi.com/tech/stl/generate.html
- */
-template<typename DerivedPolicy,
-         typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            OutputIterator first,
-                            Size n,
-                            Generator gen);
-
-
-/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments,
- *  to each element in the range <tt>[first,first + n)</tt>. The return value is <tt>first + n</tt>.
- *
- *  \param first The first element in the range of interest.
- *  \param n The size of the range of interest.
- *  \param gen A function argument, taking no parameters, used to generate values to assign to
- *             elements in the range <tt>[first,first + n)</tt>.
- *
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Size is an integral type (either signed or unsigned).
- *  \tparam Generator is a model of <a href="http://www.sgi.com/tech/stl/Generator.html">Generator</a>,
- *          and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *
- *  The following code snippet demonstrates how to fill a \c host_vector with random numbers,
- *  using the standard C library function \c rand.
- *
- *  \code
- *  #include <thrust/generate.h>
- *  #include <thrust/host_vector.h>
- *  #include <stdlib.h>
- *  ...
- *  thrust::host_vector<int> v(10);
- *  srand(13);
- *  thrust::generate_n(v.begin(), 10, rand);
- *
- *  // the elements of v are now pseudo-random numbers
- *  \endcode
- *
- *  \see generate
- *  \see http://www.sgi.com/tech/stl/generate.html
- */
-template<typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(OutputIterator first,
-                            Size n,
-                            Generator gen);
-
-
-/*! \} // end transformations
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/generate.inl>
-
diff --git a/compat/thrust/host_vector.h b/compat/thrust/host_vector.h
deleted file mode 100644
index 11b1ae0685..0000000000
--- a/compat/thrust/host_vector.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file host_vector.h
- *  \brief A dynamically-sizable array of elements which reside in the "host" memory space
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <memory>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-
-// forward declaration of device_vector
-template<typename T, typename Alloc> class device_vector;
-
-/*! \addtogroup container_classes Container Classes
- *  \addtogroup host_containers Host Containers
- *  \ingroup container_classes
- *  \{
- */
-
-/*! A \p host_vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p host_vector may vary dynamically; memory management is
- *  automatic. The memory associated with a \p host_vector resides in the memory
- *  space of the host associated with a parallel device.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see device_vector
- */
-template<typename T, typename Alloc = std::allocator<T> >
-  class host_vector
-    : public detail::vector_base<T,Alloc>
-{
-  private:
-    typedef detail::vector_base<T,Alloc> Parent;
-
-  public:
-    /*! \cond */
-    typedef typename Parent::size_type  size_type;
-    typedef typename Parent::value_type value_type;
-    /*! \endcond */
-
-    /*! This constructor creates an empty \p host_vector.
-     */
-    __host__
-    host_vector(void)
-      :Parent() {}
-
-    /*! This constructor creates a \p host_vector with the given
-     *  size.
-     *  \param n The number of elements to initially craete.
-     */
-    __host__
-    explicit host_vector(size_type n)
-      :Parent(n) {}
-
-    /*! This constructor creates a \p host_vector with copies
-     *  of an exemplar element.
-     *  \param n The number of elements to initially create.
-     *  \param value An element to copy.
-     */
-    __host__
-    explicit host_vector(size_type n, const value_type &value)
-      :Parent(n,value) {}
-
-    /*! Copy constructor copies from an exemplar \p host_vector.
-     *  \param v The \p host_vector to copy.
-     */
-    __host__
-    host_vector(const host_vector &v)
-      :Parent(v) {}
-
-    /*! Assign operator copies from an exemplar \p host_vector.
-     *  \param v The \p host_vector to copy.
-     */
-    __host__
-    host_vector &operator=(const host_vector &v)
-    { Parent::operator=(v); return *this; }
-
-    /*! Copy constructor copies from an exemplar \p host_vector with different type.
-     *  \param v The \p host_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector(const host_vector<OtherT,OtherAlloc> &v)
-      :Parent(v) {}
-
-    /*! Assign operator copies from an exemplar \p host_vector with different type.
-     *  \param v The \p host_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector &operator=(const host_vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this; }
-
-    /*! Copy constructor copies from an exemplar <tt>std::vector</tt>.
-     *  \param v The <tt>std::vector</tt> to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector(const std::vector<OtherT,OtherAlloc> &v)
-      :Parent(v) {}
-
-    /*! Assign operator copies from an exemplar <tt>std::vector</tt>.
-     *  \param v The <tt>std::vector</tt> to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector &operator=(const std::vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this;}
-
-    /*! Copy constructor copies from an exemplar \p device_vector with possibly different type.
-     *  \param v The \p device_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector(const device_vector<OtherT,OtherAlloc> &v);
-
-    /*! Assign operator copies from an exemplar \p device_vector.
-     *  \param v The \p device_vector to copy.
-     */
-    template<typename OtherT, typename OtherAlloc>
-    __host__
-    host_vector &operator=(const device_vector<OtherT,OtherAlloc> &v)
-    { Parent::operator=(v); return *this; }
-
-    /*! This constructor builds a \p host_vector from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    __host__
-    host_vector(InputIterator first, InputIterator last)
-      :Parent(first, last) {}
-
-// declare these members for the purpose of Doxygenating them
-// they actually exist in a derived-from class
-#if 0
-    /*! \brief Resizes this vector to the specified number of elements.
-     *  \param new_size Number of elements this vector should contain.
-     *  \param x Data with which new elements should be populated.
-     *  \throw std::length_error If n exceeds max_size().
-     *
-     *  This method will resize this vector to the specified number of
-     *  elements.  If the number is smaller than this vector's current
-     *  size this vector is truncated, otherwise this vector is
-     *  extended and new elements are populated with given data.
-     */
-    void resize(size_type new_size, const value_type &x = value_type());
-
-    /*! Returns the number of elements in this vector.
-     */
-    size_type size(void) const;
-
-    /*! Returns the size() of the largest possible vector.
-     *  \return The largest possible return value of size().
-     */
-    size_type max_size(void) const;
-
-    /*! \brief If n is less than or equal to capacity(), this call has no effect.
-     *         Otherwise, this method is a request for allocation of additional memory. If
-     *         the request is successful, then capacity() is greater than or equal to
-     *         n; otherwise, capacity() is unchanged. In either case, size() is unchanged.
-     *  \throw std::length_error If n exceeds max_size().
-     */
-    void reserve(size_type n);
-
-    /*! Returns the number of elements which have been reserved in this
-     *  vector.
-     */
-    size_type capacity(void) const;
-
-    /*! This method shrinks the capacity of this vector to exactly
-     *  fit its elements.
-     */
-    void shrink_to_fit(void);
-
-    /*! \brief Subscript access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read/write reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    reference operator[](size_type n);
-
-    /*! \brief Subscript read access to the data contained in this vector_dev.
-     *  \param n The index of the element for which data should be accessed.
-     *  \return Read reference to data.
-     *
-     *  This operator allows for easy, array-style, data access.
-     *  Note that data access with this operator is unchecked and
-     *  out_of_range lookups are not defined.
-     */
-    const_reference operator[](size_type n) const;
-
-    /*! This method returns an iterator pointing to the beginning of
-     *  this vector.
-     *  \return mStart
-     */
-    iterator begin(void);
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector.
-     *  \return mStart
-     */
-    const_iterator begin(void) const;
-
-    /*! This method returns a const_iterator pointing to the beginning
-     *  of this vector.
-     *  \return mStart
-     */
-    const_iterator cbegin(void) const;
-
-    /*! This method returns a reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    reverse_iterator rbegin(void);
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    const_reverse_iterator rbegin(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to the beginning of
-     *  this vector's reversed sequence.
-     *  \return A const_reverse_iterator pointing to the beginning of this
-     *          vector's reversed sequence.
-     */
-    const_reverse_iterator crbegin(void) const;
-
-    /*! This method returns an iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    iterator end(void);
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    const_iterator end(void) const;
-
-    /*! This method returns a const_iterator pointing to one element past the
-     *  last of this vector.
-     *  \return begin() + size().
-     */
-    const_iterator cend(void) const;
-
-    /*! This method returns a reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    reverse_iterator rend(void);
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator rend(void) const;
-
-    /*! This method returns a const_reverse_iterator pointing to one element past the
-     *  last of this vector's reversed sequence.
-     *  \return rbegin() + size().
-     */
-    const_reverse_iterator crend(void) const;
-
-    /*! This method returns a const_reference referring to the first element of this
-     *  vector.
-     *  \return The first element of this vector.
-     */
-    const_reference front(void) const;
-
-    /*! This method returns a reference pointing to the first element of this
-     *  vector.
-     *  \return The first element of this vector.
-     */
-    reference front(void);
-
-    /*! This method returns a const reference pointing to the last element of
-     *  this vector.
-     *  \return The last element of this vector.
-     */
-    const_reference back(void) const;
-
-    /*! This method returns a reference referring to the last element of
-     *  this vector_dev.
-     *  \return The last element of this vector.
-     */
-    reference back(void);
-
-    /*! This method returns a pointer to this vector's first element.
-     *  \return A pointer to the first element of this vector.
-     */
-    pointer data(void);
-
-    /*! This method returns a const_pointer to this vector's first element.
-     *  \return a const_pointer to the first element of this vector.
-     */
-    const_pointer data(void) const;
-
-    /*! This method resizes this vector to 0.
-     */
-    void clear(void);
-
-    /*! This method returns true iff size() == 0.
-     *  \return true if size() == 0; false, otherwise.
-     */
-    bool empty(void) const;
-
-    /*! This method appends the given element to the end of this vector.
-     *  \param x The element to append.
-     */
-    void push_back(const value_type &x);
-
-    /*! This method erases the last element of this vector, invalidating
-     *  all iterators and references to it.
-     */
-    void pop_back(void);
-
-    /*! This method swaps the contents of this vector_base with another vector.
-     *  \param v The vector with which to swap.
-     */
-    void swap(host_vector &v);
-
-    /*! This method removes the element at position pos.
-     *  \param pos The position of the element of interest.
-     *  \return An iterator pointing to the new location of the element that followed the element
-     *          at position pos.
-     */
-    iterator erase(iterator pos);
-
-    /*! This method removes the range of elements [first,last) from this vector.
-     *  \param first The beginning of the range of elements to remove.
-     *  \param last The end of the range of elements to remove.
-     *  \return An iterator pointing to the new location of the element that followed the last
-     *          element in the sequence [first,last).
-     */
-    iterator erase(iterator first, iterator last);
-
-    /*! This method inserts a single copy of a given exemplar value at the
-     *  specified position in this vector.
-     *  \param position The insertion position.
-     *  \param x The exemplar element to copy & insert.
-     *  \return An iterator pointing to the newly inserted element.
-     */
-    iterator insert(iterator position, const T &x); 
-
-    /*! This method inserts a copy of an exemplar value to a range at the
-     *  specified position in this vector.
-     *  \param position The insertion position
-     *  \param n The number of insertions to perform.
-     *  \param x The value to replicate and insert.
-     */
-    void insert(iterator position, size_type n, const T &x);
-
-    /*! This method inserts a copy of an input range at the specified position
-     *  in this vector.
-     *  \param position The insertion position.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html>Input Iterator</a>,
-     *                        and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>.
-     */
-    template<typename InputIterator>
-    void insert(iterator position, InputIterator first, InputIterator last);
-
-    /*! This version of \p assign replicates a given exemplar
-     *  \p n times into this vector.
-     *  \param n The number of times to copy \p x.
-     *  \param x The exemplar element to replicate.
-     */
-    void assign(size_type n, const T &x);
-
-    /*! This version of \p assign makes this vector a copy of a given input range.
-     *  \param first The beginning of the range to copy.
-     *  \param last  The end of the range to copy.
-     *
-     *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator">Input Iterator</a>.
-     */
-    template<typename InputIterator>
-    void assign(InputIterator first, InputIterator last);
-
-    /*! This method returns a copy of this vector's allocator.
-     *  \return A copy of the alloctor used by this vector.
-     */
-    allocator_type get_allocator(void) const;
-#endif // end doxygen-only members
-}; // end host_vector
-
-/*! \}
- */
-
-} // end thrust
-
-#include <thrust/detail/host_vector.inl>
-
diff --git a/compat/thrust/inner_product.h b/compat/thrust/inner_product.h
deleted file mode 100644
index 01f55414bd..0000000000
--- a/compat/thrust/inner_product.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file inner_product.h
- *  \brief Mathematical inner product between ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup transformed_reductions Transformed Reductions
- *  \ingroup reductions
- *  \{
- */
-
-
-/*! \p inner_product calculates an inner product of the ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, first2 + (last1 - first1))</tt>.
- *
- *  Specifically, this version of \p inner_product computes the sum
- *  <tt>init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... </tt>
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1 The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param init Initial value of the result.
- *  \return The inner product of sequences <tt>[first1, last1)</tt>
- *          and <tt>[first2, last2)</tt> plus \p init.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type,
- *          and \c z is an object of \p InputIterator2's \c value_type, then <tt>x + y * z</tt> is defined
- *          and is convertible to \p OutputType.
- *
- *  The following code demonstrates how to use \p inner_product to
- *  compute the dot product of two vectors using the \p thrust::host execution policy for parallelization.
- *
- *  \code
- *  #include <thrust/inner_product.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  float vec1[3] = {1.0f, 2.0f, 5.0f};
- *  float vec2[3] = {4.0f, 1.0f, 5.0f};
- *
- *  float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, 0.0f);
- *
- *  // result == 31.0f
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/inner_product.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputType>
-OutputType inner_product(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init);
-
-
-/*! \p inner_product calculates an inner product of the ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, first2 + (last1 - first1))</tt>.
- *
- *  Specifically, this version of \p inner_product computes the sum
- *  <tt>init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... </tt>
- *
- *  Unlike the C++ Standard Template Library function <tt>std::inner_product</tt>,
- *  this version offers no guarantee on order of execution.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1 The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param init Initial value of the result.
- *  \return The inner product of sequences <tt>[first1, last1)</tt>
- *          and <tt>[first2, last2)</tt> plus \p init.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type,
- *          and \c z is an object of \p InputIterator2's \c value_type, then <tt>x + y * z</tt> is defined
- *          and is convertible to \p OutputType.
- *
- *  The following code demonstrates how to use \p inner_product to
- *  compute the dot product of two vectors.
- *
- *  \code
- *  #include <thrust/inner_product.h>
- *  ...
- *  float vec1[3] = {1.0f, 2.0f, 5.0f};
- *  float vec2[3] = {4.0f, 1.0f, 5.0f};
- *
- *  float result = thrust::inner_product(vec1, vec1 + 3, vec2, 0.0f);
- *
- *  // result == 31.0f
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/inner_product.html
- */
-template <typename InputIterator1, typename InputIterator2, typename OutputType>
-OutputType inner_product(InputIterator1 first1, InputIterator1 last1,
-                         InputIterator2 first2, OutputType init);
-
-
-/*! \p inner_product calculates an inner product of the ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, first2 + (last1 - first1))</tt>.
- *
- *  This version of \p inner_product is identical to the first, except that is uses
- *  two user-supplied function objects instead of \c operator+ and \c operator*.
- *
- *  Specifically, this version of \p inner_product computes the sum
- *  <tt>binary_op1( init, binary_op2(*first1, *first2) ), ... </tt>
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1 The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param init Initial value of the result.
- *  \param binary_op1 Generalized addition operation.
- *  \param binary_op2 Generalized multiplication operation.
- *  \return The inner product of sequences <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *          and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type.
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type.
- *  \tparam BinaryFunction1 is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction1's \c return_type is convertible to \p OutputType.
- *  \tparam BinaryFunction2 is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type.
- * 
- *  \code
- *  #include <thrust/inner_product.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  float vec1[3] = {1.0f, 2.0f, 5.0f};
- *  float vec2[3] = {4.0f, 1.0f, 5.0f};
- *
- *  float init = 0.0f;
- *  thrust::plus<float>       binary_op1;
- *  thrust::multiplies<float> binary_op2;
- *
- *  float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, init, binary_op1, binary_op2);
- *
- *  // result == 31.0f
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/inner_product.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputType,
-         typename BinaryFunction1,
-         typename BinaryFunction2>
-OutputType inner_product(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init, 
-                         BinaryFunction1 binary_op1,
-                         BinaryFunction2 binary_op2);
-
-
-/*! \p inner_product calculates an inner product of the ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, first2 + (last1 - first1))</tt>.
- *
- *  This version of \p inner_product is identical to the first, except that is uses
- *  two user-supplied function objects instead of \c operator+ and \c operator*.
- *
- *  Specifically, this version of \p inner_product computes the sum
- *  <tt>binary_op1( init, binary_op2(*first1, *first2) ), ... </tt>
- *
- *  Unlike the C++ Standard Template Library function <tt>std::inner_product</tt>,
- *  this version offers no guarantee on order of execution.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1 The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param init Initial value of the result.
- *  \param binary_op1 Generalized addition operation.
- *  \param binary_op2 Generalized multiplication operation.
- *  \return The inner product of sequences <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *          and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type.
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type.
- *  \tparam BinaryFunction1 is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction1's \c return_type is convertible to \p OutputType.
- *  \tparam BinaryFunction2 is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type.
- * 
- *  \code
- *  #include <thrust/inner_product.h>
- *  ...
- *  float vec1[3] = {1.0f, 2.0f, 5.0f};
- *  float vec2[3] = {4.0f, 1.0f, 5.0f};
- *
- *  float init = 0.0f;
- *  thrust::plus<float>       binary_op1;
- *  thrust::multiplies<float> binary_op2;
- *
- *  float result = thrust::inner_product(vec1, vec1 + 3, vec2, init, binary_op1, binary_op2);
- *
- *  // result == 31.0f
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/inner_product.html
- */
-template <typename InputIterator1, typename InputIterator2, typename OutputType,
-          typename BinaryFunction1, typename BinaryFunction2>
-OutputType inner_product(InputIterator1 first1, InputIterator1 last1,
-                         InputIterator2 first2, OutputType init, 
-                         BinaryFunction1 binary_op1, BinaryFunction2 binary_op2);
-
-
-/*! \} // end transformed_reductions
- *  \} // end reductions
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/inner_product.inl>
-
diff --git a/compat/thrust/iterator/constant_iterator.h b/compat/thrust/iterator/constant_iterator.h
deleted file mode 100644
index e9e03c18c2..0000000000
--- a/compat/thrust/iterator/constant_iterator.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/constant_iterator.h
- *  \brief An iterator which returns a constant value when
- *         dereferenced
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/detail/constant_iterator_base.h>
-#include <thrust/iterator/iterator_facade.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p constant_iterator is an iterator which represents a pointer into a range
- *  of constant values. This iterator is useful for creating a range filled with the same
- *  value without explicitly storing it in memory. Using \p constant_iterator saves both
- *  memory capacity and bandwidth.
- *
- *  The following code snippet demonstrates how to create a \p constant_iterator whose
- *  \c value_type is \c int and whose value is \c 10.
- *
- *  \code
- *  #include <thrust/iterator/constant_iterator.h>
- *
- *  thrust::constant_iterator<int> iter(10);
- *
- *  *iter;    // returns 10
- *  iter[0];  // returns 10
- *  iter[1];  // returns 10
- *  iter[13]; // returns 10
- *
- *  // and so on...
- *  \endcode
- *
- *  This next example demonstrates how to use a \p constant_iterator with the
- *  \p thrust::transform function to increment all elements of a sequence by the
- *  same value. We will create a temporary \p constant_iterator with the function
- *  \p make_constant_iterator function in order to avoid explicitly specifying
- *  its type:
- *
- *  \code
- *  #include <thrust/iterator/constant_iterator.h>
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> data(4);
- *    data[0] = 3;
- *    data[1] = 7;
- *    data[2] = 2;
- *    data[3] = 5;
- *    
- *    // add 10 to all values in data
- *    thrust::transform(data.begin(), data.end(),
- *                      thrust::make_constant_iterator(10),
- *                      data.begin(),
- *                      thrust::plus<int>());
- *    
- *    // data is now [13, 17, 12, 15]
- *    
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see make_constant_iterator
- */
-template<typename Value,
-         typename Incrementable = use_default,
-         typename System = use_default>
-  class constant_iterator
-    : public detail::constant_iterator_base<Value, Incrementable, System>::type
-{
-    /*! \cond
-     */
-    friend class thrust::iterator_core_access;
-    typedef typename detail::constant_iterator_base<Value, Incrementable, System>::type          super_t;
-    typedef typename detail::constant_iterator_base<Value, Incrementable, System>::incrementable incrementable;
-    typedef typename detail::constant_iterator_base<Value, Incrementable, System>::base_iterator base_iterator;
-
-  public:
-    typedef typename super_t::reference  reference;
-    typedef typename super_t::value_type value_type;
-
-    /*! \endcond
-     */
-
-    /*! Null constructor initializes this \p constant_iterator's constant using its
-     *  null constructor.
-     */
-    __host__ __device__
-    constant_iterator(void)
-      : super_t(), m_value(){};
-
-    /*! Copy constructor copies the value of another \p constant_iterator into this
-     *  \p constant_iterator.
-     *
-     *  \p rhs The constant_iterator to copy.
-     */
-    __host__ __device__
-    constant_iterator(constant_iterator const &rhs)
-      : super_t(rhs.base()), m_value(rhs.m_value) {}
-
-    /*! Copy constructor copies the value of another \p constant_iterator with related
-     *  System type.
-     *
-     *  \param rhs The \p constant_iterator to copy.
-     */
-    template<typename OtherSystem>
-    __host__ __device__
-    constant_iterator(constant_iterator<Value,Incrementable,OtherSystem> const &rhs,
-                      typename thrust::detail::enable_if_convertible<
-                        typename thrust::iterator_system<constant_iterator<Value,Incrementable,OtherSystem> >::type,
-                        typename thrust::iterator_system<super_t>::type
-                      >::type * = 0)
-      : super_t(rhs.base()), m_value(rhs.value()) {}
-
-    /*! This constructor receives a value to use as the constant value of this
-     *  \p constant_iterator and an index specifying the location of this
-     *  \p constant_iterator in a sequence.
-     *  
-     *  \p v The value of this \p constant_iterator's constant value.
-     *  \p i The index of this \p constant_iterator in a sequence. Defaults to the
-     *       value returned by \c Incrementable's null constructor. For example,
-     *       when <tt>Incrementable == int</tt>, \c 0.
-     */
-    __host__ __device__
-    constant_iterator(value_type const& v, incrementable const &i = incrementable())
-      : super_t(base_iterator(i)), m_value(v) {}
-
-    /*! This constructor is templated to allow construction from a value type and
-     *  incrementable type related this this \p constant_iterator's respective types.
-     *
-     *  \p v The value of this \p constant_iterator's constant value.
-     *  \p i The index of this \p constant_iterator in a sequence. Defaults to the
-     *       value returned by \c Incrementable's null constructor. For example,
-     *       when <tt>Incrementable == int</tt>, \c 0.
-     */
-    template<typename OtherValue, typename OtherIncrementable>
-    __host__ __device__
-    constant_iterator(OtherValue const& v, OtherIncrementable const& i = incrementable())
-      : super_t(base_iterator(i)), m_value(v) {}
-
-    /*! This method returns the value of this \p constant_iterator's constant value.
-     *  \return A \c const reference to this \p constant_iterator's constant value.
-     */
-    __host__ __device__
-    Value const& value(void) const
-    { return m_value; }
-
-    /*! \cond
-     */
-
-  protected:
-    __host__ __device__
-    Value const& value_reference(void) const
-    { return m_value; }
-
-    __host__ __device__
-    Value & value_reference(void)
-    { return m_value; }
-  
-  private: // Core iterator interface
-    __host__ __device__
-    reference dereference(void) const
-    {
-      return m_value;
-    }
-
-  private:
-    Value m_value;
-
-    /*! \endcond
-     */
-}; // end constant_iterator
-
-
-/*! This version of \p make_constant_iterator creates a \p constant_iterator
- *  from values given for both value and index. The type of \p constant_iterator
- *  may be inferred by the compiler from the types of its parameters.
- *
- *  \param x The value of the returned \p constant_iterator's constant value.
- *  \param i The index of the returned \p constant_iterator within a sequence.
- *           The type of this parameter defaults to \c int. In the default case,
- *           the value of this parameter is \c 0.
- *
- *  \return A new \p constant_iterator with constant value & index as given
- *          by \p x & \p i.
- *
- *  \see constant_iterator
- */
-template<typename V, typename I>
-inline __host__ __device__
-constant_iterator<V,I> make_constant_iterator(V x, I i = int())
-{
-  return constant_iterator<V,I>(x, i);
-} // end make_constant_iterator()
-
-
-/*! This version of \p make_constant_iterator creates a \p constant_iterator
- *  using only a parameter for the desired constant value. The value of the
- *  returned \p constant_iterator's index is set to \c 0.
- *
- *  \param x The value of the returned \p constant_iterator's constant value.
- *  \return A new \p constant_iterator with constant value equal to \p x and
- *          index equal to \c 0.
- *  \see constant_iterator
- */
-template<typename V>
-inline __host__ __device__
-constant_iterator<V> make_constant_iterator(V x)
-{
-  return constant_iterator<V>(x, 0);
-} // end make_constant_iterator()
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end namespace thrust
-
diff --git a/compat/thrust/iterator/counting_iterator.h b/compat/thrust/iterator/counting_iterator.h
deleted file mode 100644
index 99812cae17..0000000000
--- a/compat/thrust/iterator/counting_iterator.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/counting_iterator.h
- *  \brief An iterator which returns an increasing incrementable value
- *         when dereferenced
- */
-
-/*
- * Copyright David Abrahams 2003.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/iterator/iterator_categories.h>
-
-// #include the details first
-#include <thrust/iterator/detail/counting_iterator.inl>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p counting_iterator is an iterator which represents a pointer into a range
- *  of sequentially changing values. This iterator is useful for creating a range
- *  filled with a sequence without explicitly storing it in memory. Using
- *  \p counting_iterator saves memory capacity and bandwidth.
- *
- *  The following code snippet demonstrates how to create a \p counting_iterator whose
- *  \c value_type is \c int and which sequentially increments by \c 1.
- *
- *  \code
- *  #include <thrust/iterator/counting_iterator.h>
- *  ...
- *  // create iterators
- *  thrust::counting_iterator<int> first(10);
- *  thrust::counting_iterator<int> last = first + 3;
- *   
- *  first[0]   // returns 10
- *  first[1]   // returns 11
- *  first[100] // returns 110
- *   
- *  // sum of [first, last)
- *  thrust::reduce(first, last);   // returns 33 (i.e. 10 + 11 + 12)
- *   
- *  // initialize vector to [0,1,2,..]
- *  thrust::counting_iterator<int> iter(0);
- *  thrust::device_vector<int> vec(500);
- *  thrust::copy(iter, iter + vec.size(), vec.begin());
- *  \endcode
- *
- *  This next example demonstrates how to use a \p counting_iterator with the
- *  \p thrust::copy_if function to compute the indices of the non-zero elements
- *  of a \p device_vector. In this example, we use the \p make_counting_iterator
- *  function to avoid specifying the type of the \p counting_iterator.
- *
- *  \code
- *  #include <thrust/iterator/counting_iterator.h>
- *  #include <thrust/copy.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *   
- *  int main(void)
- *  {
- *   // this example computes indices for all the nonzero values in a sequence
- *   
- *   // sequence of zero and nonzero values
- *   thrust::device_vector<int> stencil(8);
- *   stencil[0] = 0;
- *   stencil[1] = 1;
- *   stencil[2] = 1;
- *   stencil[3] = 0;
- *   stencil[4] = 0;
- *   stencil[5] = 1;
- *   stencil[6] = 0;
- *   stencil[7] = 1;
- *   
- *   // storage for the nonzero indices
- *   thrust::device_vector<int> indices(8);
- *   
- *   // compute indices of nonzero elements
- *   typedef thrust::device_vector<int>::iterator IndexIterator;
- *   
- *   // use make_counting_iterator to define the sequence [0, 8)
- *   IndexIterator indices_end = thrust::copy_if(thrust::make_counting_iterator(0),
- *                                               thrust::make_counting_iterator(8),
- *                                               stencil.begin(),
- *                                               indices.begin(),
- *                                               thrust::identity<int>());
- *   // indices now contains [1,2,5,7]
- *   
- *   return 0;
- *  }
- *  \endcode
- *
- *  \see make_counting_iterator
- */
-template<typename Incrementable,
-         typename System = use_default,
-         typename Traversal = use_default,
-         typename Difference = use_default>
-  class counting_iterator
-    : public detail::counting_iterator_base<Incrementable, System, Traversal, Difference>::type
-{
-    /*! \cond
-     */
-    typedef typename detail::counting_iterator_base<Incrementable, System, Traversal, Difference>::type super_t;
-
-    friend class thrust::iterator_core_access;
-
-  public:
-    typedef typename super_t::reference       reference;
-    typedef typename super_t::difference_type difference_type;
-
-    /*! \endcond
-     */
-
-    /*! Null constructor initializes this \p counting_iterator's \c Incrementable
-     *  counter using its null constructor.
-     */
-    __host__ __device__
-    counting_iterator(void){};
-
-    /*! Copy constructor copies the value of another \p counting_iterator into a
-     *  new \p counting_iterator.
-     *
-     *  \p rhs The \p counting_iterator to copy.
-     */
-    __host__ __device__
-    counting_iterator(counting_iterator const &rhs):super_t(rhs.base()){}
-
-    /*! Copy constructor copies the value of another counting_iterator 
-     *  with related System type.
-     *
-     *  \param rhs The \p counting_iterator to copy.
-     */
-    template<typename OtherSystem>
-    __host__ __device__
-    counting_iterator(counting_iterator<Incrementable, OtherSystem, Traversal, Difference> const &rhs,
-                      typename thrust::detail::enable_if_convertible<
-                        typename thrust::iterator_system<counting_iterator<Incrementable,OtherSystem,Traversal,Difference> >::type,
-                        typename thrust::iterator_system<super_t>::type
-                      >::type * = 0)
-      : super_t(rhs.base()){}
-
-    /*! This \c explicit constructor copies the value of an \c Incrementable
-     *  into a new \p counting_iterator's \c Incrementable counter.
-     *  
-     *  \param x The initial value of the new \p counting_iterator's \c Incrementable
-     *         counter.
-     */
-    __host__ __device__
-    explicit counting_iterator(Incrementable x):super_t(x){}
-
-    /*! \cond
-     */
-  private:
-    __host__ __device__
-    reference dereference(void) const
-    {
-      return this->base_reference();
-    }
-
-    // note that we implement equal specially for floating point counting_iterator
-    template <typename OtherIncrementable, typename OtherSystem, typename OtherTraversal, typename OtherDifference>
-    __host__ __device__
-    bool equal(counting_iterator<OtherIncrementable, OtherSystem, OtherTraversal, OtherDifference> const& y) const
-    {
-      typedef thrust::detail::counting_iterator_equal<difference_type,Incrementable,OtherIncrementable> e;
-      return e::equal(this->base(), y.base());
-    }
-
-    template <class OtherIncrementable>
-    __host__ __device__
-    difference_type
-    distance_to(counting_iterator<OtherIncrementable, System, Traversal, Difference> const& y) const
-    {
-      typedef typename
-      thrust::detail::eval_if<
-        thrust::detail::is_numeric<Incrementable>::value,
-        thrust::detail::identity_<thrust::detail::number_distance<difference_type, Incrementable, OtherIncrementable> >,
-        thrust::detail::identity_<thrust::detail::iterator_distance<difference_type, Incrementable, OtherIncrementable> >
-      >::type d;
-
-      return d::distance(this->base(), y.base());
-    }
-
-    /*! \endcond
-     */
-}; // end counting_iterator
-
-
-/*! \p make_counting_iterator creates a \p counting_iterator
- *  using an initial value for its \c Incrementable counter.
- *
- *  \param x The initial value of the new \p counting_iterator's counter.
- *  \return A new \p counting_iterator whose counter has been initialized to \p x.
- */
-template <typename Incrementable>
-inline __host__ __device__
-counting_iterator<Incrementable> make_counting_iterator(Incrementable x)
-{
-  return counting_iterator<Incrementable>(x);
-}
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/any_assign.h b/compat/thrust/iterator/detail/any_assign.h
deleted file mode 100644
index e08a829ec0..0000000000
--- a/compat/thrust/iterator/detail/any_assign.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-// a type which may be assigned any other type
-struct any_assign
-{
-  inline __host__ __device__ any_assign(void)
-  {}
-
-  template<typename T>
-  inline __host__ __device__ any_assign(T)
-  {}
-
-  template<typename T>
-  inline __host__ __device__
-  any_assign &operator=(T)
-  {
-    if(0)
-    {
-      // trick the compiler into silencing "warning: this expression has no effect"
-      int *x = 0;
-      *x = 13;
-    } // end if
-
-    return *this;
-  }
-};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/any_system_tag.h b/compat/thrust/iterator/detail/any_system_tag.h
deleted file mode 100644
index fc6417ad8a..0000000000
--- a/compat/thrust/iterator/detail/any_system_tag.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-struct any_system_tag
-  : thrust::execution_policy<any_system_tag>
-{
-  // allow any_system_tag to convert to any type at all
-  // XXX make this safer using enable_if<is_tag<T>> upon c++11
-  template<typename T> operator T () const {return T();}
-};
-
-// TODO remove this in 1.7.0
-typedef THRUST_DEPRECATED any_system_tag any_space_tag;
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/constant_iterator_base.h b/compat/thrust/iterator/detail/constant_iterator_base.h
deleted file mode 100644
index 276e5ff0ef..0000000000
--- a/compat/thrust/iterator/detail/constant_iterator_base.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/iterator_adaptor.h>
-
-namespace thrust
-{
-
-// forward declaration of constant_iterator
-template<typename,typename,typename> class constant_iterator;
-
-namespace detail
-{
-
-template<typename Value,
-         typename Incrementable,
-         typename System>
-  struct constant_iterator_base
-{
-  typedef Value              value_type;
-
-  // the reference type is the same as the value_type.
-  // we wish to avoid returning a reference to the internal state
-  // of the constant_iterator, which is prone to subtle bugs.
-  // consider the temporary iterator created in the expression
-  // *(iter + i)
-  typedef value_type         reference;
-
-  // the incrementable type is int unless otherwise specified
-  typedef typename thrust::detail::ia_dflt_help<
-    Incrementable,
-    thrust::detail::identity_<int>
-  >::type incrementable;
-
-  typedef typename thrust::counting_iterator<
-    incrementable,
-    System,
-    thrust::random_access_traversal_tag
-  > base_iterator;
-
-  typedef typename thrust::iterator_adaptor<
-    constant_iterator<Value, Incrementable, System>,
-    base_iterator,
-    value_type, // XXX we may need to pass const value_type here as boost counting_iterator does
-    typename thrust::iterator_system<base_iterator>::type,
-    typename thrust::iterator_traversal<base_iterator>::type,
-    reference
-  > type;
-}; // end constant_iterator_base
-
-} // end detail
-  
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/counting_iterator.inl b/compat/thrust/iterator/detail/counting_iterator.inl
deleted file mode 100644
index ad4fcffaa6..0000000000
--- a/compat/thrust/iterator/detail/counting_iterator.inl
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/numeric_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <cstddef>
-
-namespace thrust
-{
-
-// forward declaration of counting_iterator
-template <typename Incrementable, typename System, typename Traversal, typename Difference>
-  class counting_iterator;
-
-namespace detail
-{
-
-template <typename Incrementable, typename System, typename Traversal, typename Difference>
-  struct counting_iterator_base
-{
-  typedef typename thrust::detail::eval_if<
-    // use any_system_tag if we are given use_default
-    thrust::detail::is_same<System,use_default>::value,
-    thrust::detail::identity_<thrust::any_system_tag>,
-    thrust::detail::identity_<System>
-  >::type system;
-
-  typedef typename thrust::detail::ia_dflt_help<
-      Traversal,
-      thrust::detail::eval_if<
-          thrust::detail::is_numeric<Incrementable>::value,
-          thrust::detail::identity_<random_access_traversal_tag>,
-          thrust::iterator_traversal<Incrementable>
-      >
-  >::type traversal;
-
-  // unlike Boost, we explicitly use std::ptrdiff_t as the difference type
-  // for floating point counting_iterators
-  typedef typename thrust::detail::ia_dflt_help<
-    Difference,
-    thrust::detail::eval_if<
-      thrust::detail::is_numeric<Incrementable>::value,
-        thrust::detail::eval_if<
-          thrust::detail::is_integral<Incrementable>::value,
-          thrust::detail::numeric_difference<Incrementable>,
-          thrust::detail::identity_<std::ptrdiff_t>
-        >,
-      thrust::iterator_difference<Incrementable>
-    >
-  >::type difference;
-
-  // our implementation departs from Boost's in that counting_iterator::dereference
-  // returns a copy of its counter, rather than a reference to it. returning a reference
-  // to the internal state of an iterator causes subtle bugs (consider the temporary
-  // iterator created in the expression *(iter + i) ) and has no compelling use case
-  typedef thrust::iterator_adaptor<
-    counting_iterator<Incrementable, System, Traversal, Difference>, // self
-    Incrementable,                                                  // Base
-    Incrementable,                                                  // XXX we may need to pass const here as Boost does
-    system,
-    traversal,
-    Incrementable,
-    difference
-  > type;
-}; // end counting_iterator_base
-
-
-template<typename Difference, typename Incrementable1, typename Incrementable2>
-  struct iterator_distance
-{
-  __host__ __device__
-  static Difference distance(Incrementable1 x, Incrementable2 y)
-  {
-    return y - x;
-  }
-};
-
-
-template<typename Difference, typename Incrementable1, typename Incrementable2>
-  struct number_distance
-{
-  __host__ __device__
-  static Difference distance(Incrementable1 x, Incrementable2 y)
-  {
-      return static_cast<Difference>(numeric_distance(x,y));
-  }
-};
-
-
-template<typename Difference, typename Incrementable1, typename Incrementable2, typename Enable = void>
-  struct counting_iterator_equal
-{
-  __host__ __device__
-  static bool equal(Incrementable1 x, Incrementable2 y)
-  {
-    return x == y;
-  }
-};
-
-
-// specialization for floating point equality
-template<typename Difference, typename Incrementable1, typename Incrementable2>
-  struct counting_iterator_equal<
-    Difference,
-    Incrementable1,
-    Incrementable2,
-    typename thrust::detail::enable_if<
-      thrust::detail::is_floating_point<Incrementable1>::value ||
-      thrust::detail::is_floating_point<Incrementable2>::value
-    >::type
-  >
-{
-  __host__ __device__
-  static bool equal(Incrementable1 x, Incrementable2 y)
-  {
-    typedef number_distance<Difference,Incrementable1,Incrementable2> d;
-    return d::distance(x,y) == 0;
-  }
-};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/device_system_tag.h b/compat/thrust/iterator/detail/device_system_tag.h
deleted file mode 100644
index ab66fb48bf..0000000000
--- a/compat/thrust/iterator/detail/device_system_tag.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include the device system's execution_policy header
-#define __THRUST_DEVICE_SYSTEM_TAG_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/execution_policy.h>
-#include __THRUST_DEVICE_SYSTEM_TAG_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TAG_HEADER
-
-namespace thrust
-{
-
-typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::tag device_system_tag;
-
-} // end thrust
-
-// TODO remove this in 1.8.0
-namespace thrust
-{
-
-typedef THRUST_DEPRECATED device_system_tag device_space_tag;
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/discard_iterator_base.h b/compat/thrust/iterator/detail/discard_iterator_base.h
deleted file mode 100644
index 1909ca8239..0000000000
--- a/compat/thrust/iterator/detail/discard_iterator_base.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/detail/any_assign.h>
-#include <cstddef> // for std::ptrdiff_t
-
-namespace thrust
-{
-
-// forward declaration of discard_iterator
-template<typename> class discard_iterator;
-
-namespace detail
-{
-
-
-template<typename System>
-  struct discard_iterator_base
-{
-  // XXX value_type should actually be void
-  //     but this interferes with zip_iterator<discard_iterator>
-  typedef any_assign         value_type;
-  typedef any_assign&        reference;
-  typedef std::ptrdiff_t     incrementable;
-
-  typedef typename thrust::counting_iterator<
-    incrementable,
-    System,
-    thrust::random_access_traversal_tag
-  > base_iterator;
-
-  typedef typename thrust::iterator_adaptor<
-    discard_iterator<System>,
-    base_iterator,
-    value_type,
-    typename thrust::iterator_system<base_iterator>::type,
-    typename thrust::iterator_traversal<base_iterator>::type,
-    reference
-  > type;
-}; // end discard_iterator_base
-
-
-} // end detail
-  
-} // end thrust
-
-
diff --git a/compat/thrust/iterator/detail/distance_from_result.h b/compat/thrust/iterator/detail/distance_from_result.h
deleted file mode 100644
index bf83e6ca44..0000000000
--- a/compat/thrust/iterator/detail/distance_from_result.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-// since both arguments are known to be specializations of iterator_facade,
-// it's legal to access IteratorFacade2::difference_type
-template<typename IteratorFacade1, typename IteratorFacade2>
-  struct distance_from_result
-    : eval_if<
-        is_convertible<IteratorFacade2,IteratorFacade1>::value,
-        identity_<typename IteratorFacade1::difference_type>,
-        identity_<typename IteratorFacade2::difference_type>
-      >
-{};
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/host_system_tag.h b/compat/thrust/iterator/detail/host_system_tag.h
deleted file mode 100644
index 26d3f7d73f..0000000000
--- a/compat/thrust/iterator/detail/host_system_tag.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include the host system's execution_policy header
-#define __THRUST_HOST_SYSTEM_TAG_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/execution_policy.h>
-#include __THRUST_HOST_SYSTEM_TAG_HEADER
-#undef __THRUST_HOST_SYSTEM_TAG_HEADER
-
-namespace thrust
-{
-
-typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::tag host_system_tag;
-
-} // end thrust
-
-// TODO remove this in 1.8.0
-namespace thrust
-{
-
-typedef THRUST_DEPRECATED host_system_tag host_space_tag;
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/is_iterator_category.h b/compat/thrust/iterator/detail/is_iterator_category.h
deleted file mode 100644
index 95f14d558c..0000000000
--- a/compat/thrust/iterator/detail/is_iterator_category.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template <typename T>
-  struct is_host_iterator_category
-    : thrust::detail::or_<
-        thrust::detail::is_convertible<T, thrust::input_host_iterator_tag>,
-        thrust::detail::is_convertible<T, thrust::output_host_iterator_tag>
-      >
-{
-}; // end is_host_iterator_category
-
-template <typename T>
-  struct is_device_iterator_category
-    : thrust::detail::or_<
-        thrust::detail::is_convertible<T, thrust::input_device_iterator_tag>,
-        thrust::detail::is_convertible<T, thrust::output_device_iterator_tag>
-      >
-{
-}; // end is_device_iterator_category
-
-
-template <typename T>
-  struct is_iterator_category
-    : thrust::detail::or_<
-        is_host_iterator_category<T>,
-        is_device_iterator_category<T>
-      >
-{
-}; // end is_iterator_category
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/is_trivial_iterator.h b/compat/thrust/iterator/detail/is_trivial_iterator.h
deleted file mode 100644
index ca37e74e64..0000000000
--- a/compat/thrust/iterator/detail/is_trivial_iterator.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-
-#if __GNUC__
-// forward declaration of gnu's __normal_iterator
-namespace __gnu_cxx
-{
-
-template<typename Iterator, typename Container> class __normal_iterator;
-
-} // end __gnu_cxx
-#endif // __GNUC__
-
-#if _MSC_VER
-// forward declaration of MSVC's "normal iterators"
-namespace std
-{
-
-template<typename Value, typename Difference, typename Pointer, typename Reference> struct _Ranit;
-
-} // end std
-#endif // _MSC_VER
-
-namespace thrust
-{
-namespace detail
-{
-
-#ifdef __GNUC__
-template<typename T>
-  struct is_gnu_normal_iterator
-    : false_type
-{};
-
-
-// catch gnu __normal_iterators
-template<typename Iterator, typename Container>
-  struct is_gnu_normal_iterator< __gnu_cxx::__normal_iterator<Iterator, Container> >
-    : true_type
-{};
-#endif // __GNUC__
-
-
-#ifdef _MSC_VER
-// catch msvc _Ranit
-template<typename Iterator>
-  struct is_convertible_to_msvc_Ranit :
-    is_convertible<
-      Iterator,
-      std::_Ranit<
-        typename iterator_value<Iterator>::type,
-        typename iterator_difference<Iterator>::type,
-        typename iterator_pointer<Iterator>::type,
-        typename iterator_reference<Iterator>::type
-      >
-    >
-{};
-#endif // _MSC_VER
-
-
-template<typename T>
-  struct is_trivial_iterator :
-    integral_constant<
-      bool,
-        is_pointer<T>::value
-      | thrust::detail::is_thrust_pointer<T>::value
-#if __GNUC__
-      | is_gnu_normal_iterator<T>::value
-#endif // __GNUC__
-#ifdef _MSC_VER
-      | is_convertible_to_msvc_Ranit<T>::value
-#endif // _MSC_VER
-    >
-{};
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_adaptor_base.h b/compat/thrust/iterator/detail/iterator_adaptor_base.h
deleted file mode 100644
index 8b77f05d81..0000000000
--- a/compat/thrust/iterator/detail/iterator_adaptor_base.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/use_default.h>
-#include <thrust/iterator/iterator_facade.h>
-
-namespace thrust
-{
-
-
-// forward declaration of iterator_adaptor for iterator_adaptor_base below
-template<typename Derived,
-         typename Base,
-         typename Value,
-         typename System,
-         typename Traversal,
-         typename Reference,
-         typename Difference
->
-class iterator_adaptor;
-
-
-namespace detail
-{
-
-// If T is use_default, return the result of invoking
-// DefaultNullaryFn, otherwise return T.
-// XXX rename to dflt_help
-template <class T, class DefaultNullaryFn>
-struct ia_dflt_help
-  : thrust::detail::eval_if<
-        thrust::detail::is_same<T, thrust::use_default>::value
-      , DefaultNullaryFn
-      , thrust::detail::identity_<T>
-    >
-{
-}; // end ia_dflt_help
-
-
-// A metafunction which computes an iterator_adaptor's base class,
-// a specialization of iterator_facade.
-template<typename Derived,
-         typename Base,
-         typename Value,
-         typename System,
-         typename Traversal,
-         typename Reference,
-         typename Difference
->
-  struct iterator_adaptor_base
-{
-  typedef typename ia_dflt_help<
-    Value,
-    iterator_value<Base>
-  >::type value;
-
-  typedef typename ia_dflt_help<
-    System,
-    thrust::iterator_system<Base>
-  >::type system;
-
-  typedef typename ia_dflt_help<
-    Traversal,
-    thrust::iterator_traversal<Base>
-  >::type traversal;
-
-  typedef typename ia_dflt_help<
-    Reference,
-    thrust::detail::eval_if<
-      thrust::detail::is_same<Value,use_default>::value,
-      thrust::iterator_reference<Base>,
-      thrust::detail::add_reference<Value>
-    >
-  >::type reference;
-
-  typedef typename ia_dflt_help<
-    Difference,
-    iterator_difference<Base>
-  >::type difference;
-
-  typedef thrust::iterator_facade<
-    Derived,
-    value,
-    system,
-    traversal,
-    reference,
-    difference
-  > type;
-}; // end iterator_adaptor_base
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_category_to_system.h b/compat/thrust/iterator/detail/iterator_category_to_system.h
deleted file mode 100644
index 17e7d78c8f..0000000000
--- a/compat/thrust/iterator/detail/iterator_category_to_system.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/detail/iterator_traversal_tags.h>
-#include <thrust/iterator/detail/host_system_tag.h>
-#include <thrust/iterator/detail/device_system_tag.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-// XXX WAR circular #inclusion with forward declarations
-struct random_access_universal_iterator_tag;
-struct input_universal_iterator_tag;
-struct output_universal_iterator_tag;
-
-namespace detail
-{
-
-// forward declaration
-template <typename> struct is_iterator_system;
-
-template <typename> struct device_iterator_category_to_backend_system;
-
-// XXX this should work entirely differently
-// we should just specialize this metafunction for iterator_category_with_system_and_traversal
-template<typename Category>
-  struct iterator_category_to_system
-    // convertible to any iterator?
-    : eval_if<
-        or_<
-          is_convertible<Category, thrust::input_universal_iterator_tag>,
-          is_convertible<Category, thrust::output_universal_iterator_tag>
-        >::value,
-
-        detail::identity_<thrust::any_system_tag>,
-
-        // convertible to host iterator?
-        eval_if<
-          or_<
-            is_convertible<Category, thrust::input_host_iterator_tag>,
-            is_convertible<Category, thrust::output_host_iterator_tag>
-          >::value,
-
-          detail::identity_<thrust::host_system_tag>,
-          
-          // convertible to device iterator?
-          eval_if<
-            or_<
-              is_convertible<Category, thrust::input_device_iterator_tag>,
-              is_convertible<Category, thrust::output_device_iterator_tag>
-            >::value,
-
-            detail::identity_<thrust::device_system_tag>,
-
-            // unknown system
-            detail::identity_<void>
-          > // if device
-        > // if host
-      > // if any
-{
-}; // end iterator_category_to_system
-
-
-template<typename CategoryOrTraversal>
-  struct iterator_category_or_traversal_to_system
-    : eval_if<
-        is_iterator_system<CategoryOrTraversal>::value,
-        detail::identity_<CategoryOrTraversal>,
-        iterator_category_to_system<CategoryOrTraversal>
-      >
-{
-}; // end iterator_category_or_traversal_to_system
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_category_to_traversal.h b/compat/thrust/iterator/detail/iterator_category_to_traversal.h
deleted file mode 100644
index 04ef60c0c2..0000000000
--- a/compat/thrust/iterator/detail/iterator_category_to_traversal.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/detail/iterator_traversal_tags.h>
-#include <thrust/iterator/detail/iterator_category_to_system.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-// XXX WAR circular #inclusion with these forward declarations
-struct bidirectional_universal_iterator_tag;
-struct forward_universal_iterator_tag;
-
-namespace detail
-{
-
-// forward declarations
-template <typename> struct is_iterator_system;
-template <typename> struct is_iterator_traversal;
-
-// make type_traits easy to access
-using namespace thrust::detail;
-
-template <typename Category>
-  struct host_system_category_to_traversal
-    : eval_if<
-        is_convertible<Category, random_access_host_iterator_tag>::value,
-        detail::identity_<random_access_traversal_tag>,
-        eval_if<
-          is_convertible<Category, bidirectional_host_iterator_tag>::value,
-          detail::identity_<bidirectional_traversal_tag>,
-          eval_if<
-            is_convertible<Category, forward_host_iterator_tag>::value,
-            detail::identity_<forward_traversal_tag>,
-            eval_if<
-              is_convertible<Category, input_host_iterator_tag>::value,
-              detail::identity_<single_pass_traversal_tag>,
-              eval_if<
-                is_convertible<Category, output_host_iterator_tag>::value,
-                detail::identity_<incrementable_traversal_tag>,
-                void
-              >
-            >
-          >
-        >
-      >
-{
-}; // end host_system_category_to_traversal
-
-
-
-template <typename Category>
-  struct device_system_category_to_traversal
-    : eval_if<
-        is_convertible<Category, random_access_device_iterator_tag>::value,
-        detail::identity_<random_access_traversal_tag>,
-        eval_if<
-          is_convertible<Category, bidirectional_device_iterator_tag>::value,
-          detail::identity_<bidirectional_traversal_tag>,
-          eval_if<
-            is_convertible<Category, forward_device_iterator_tag>::value,
-            detail::identity_<forward_traversal_tag>,
-            eval_if<
-              is_convertible<Category, input_device_iterator_tag>::value,
-              detail::identity_<single_pass_traversal_tag>,
-              eval_if<
-                is_convertible<Category, output_device_iterator_tag>::value,
-                detail::identity_<incrementable_traversal_tag>,
-                void
-              >
-            >
-          >
-        >
-      >
-{
-}; // end device_system_category_to_traversal
-
-
-
-template <typename Category>
-  struct any_system_category_to_traversal
-    : eval_if<
-        is_convertible<Category, random_access_universal_iterator_tag>::value,
-        identity_<random_access_traversal_tag>,
-        eval_if<
-          is_convertible<Category, bidirectional_universal_iterator_tag>::value,
-          identity_<bidirectional_traversal_tag>,
-          eval_if<
-            is_convertible<Category, forward_universal_iterator_tag>::value,
-            identity_<forward_traversal_tag>,
-            eval_if<
-              is_convertible<Category, input_universal_iterator_tag>::value,
-              identity_<single_pass_traversal_tag>,
-              eval_if<
-                is_convertible<Category, output_universal_iterator_tag>::value,
-                identity_<incrementable_traversal_tag>,
-
-                // unknown traversal
-                void
-              >
-            >
-          >
-        >
-      >
-{
-}; // end any_system_category_to_traversal
-
-
-template<typename Category>
-  struct category_to_traversal
-      // check for any system
-    : eval_if<
-        or_<
-          is_convertible<Category, thrust::input_universal_iterator_tag>,
-          is_convertible<Category, thrust::output_universal_iterator_tag>
-        >::value,
-
-        any_system_category_to_traversal<Category>,
-
-        // check for host system
-        eval_if<
-          or_<
-            is_convertible<Category, thrust::input_host_iterator_tag>,
-            is_convertible<Category, thrust::output_host_iterator_tag>
-          >::value,
-
-          host_system_category_to_traversal<Category>,
-
-          // check for device system
-          eval_if<
-            or_<
-              is_convertible<Category, thrust::input_device_iterator_tag>,
-              is_convertible<Category, thrust::output_device_iterator_tag>
-            >::value,
-
-            device_system_category_to_traversal<Category>,
-
-            // unknown category
-            void
-          >
-        >
-      >
-{};
-
-
-template <typename CategoryOrTraversal>
-  struct iterator_category_to_traversal
-    : eval_if<
-        is_iterator_traversal<CategoryOrTraversal>::value,
-        detail::identity_<CategoryOrTraversal>,
-        category_to_traversal<CategoryOrTraversal>
-      >
-{
-}; // end iterator_category_to_traversal
-
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_facade_category.h b/compat/thrust/iterator/detail/iterator_facade_category.h
deleted file mode 100644
index fbb8bd6451..0000000000
--- a/compat/thrust/iterator/detail/iterator_facade_category.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/host_system_tag.h>
-#include <thrust/iterator/detail/device_system_tag.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/detail/iterator_traversal_tags.h>
-#include <thrust/iterator/detail/is_iterator_category.h>
-#include <thrust/iterator/detail/iterator_category_to_traversal.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-template<typename Category, typename System, typename Traversal>
-  struct iterator_category_with_system_and_traversal
-    : Category
-{
-}; // end iterator_category_with_system_and_traversal
-
-// specialize iterator_category_to_system for iterator_category_with_system_and_traversal
-template<typename Category> struct iterator_category_to_system;
-
-template<typename Category, typename System, typename Traversal>
-  struct iterator_category_to_system<iterator_category_with_system_and_traversal<Category,System,Traversal> >
-{
-  typedef System type;
-}; // end iterator_category_with_system_and_traversal
-
-
-// adapted from http://www.boost.org/doc/libs/1_37_0/libs/iterator/doc/iterator_facade.html#iterator-category
-//
-// in our implementation, R need not be a reference type to result in a category
-// derived from forward_XXX_iterator_tag
-//
-// iterator-category(T,V,R) :=
-//   if(T is convertible to input_host_iterator_tag
-//      || T is convertible to output_host_iterator_tag
-//      || T is convertible to input_device_iterator_tag
-//      || T is convertible to output_device_iterator_tag
-//   )
-//     return T
-//
-//   else if (T is not convertible to incrementable_traversal_tag)
-//     the program is ill-formed
-//
-//   else return a type X satisfying the following two constraints:
-//
-//     1. X is convertible to X1, and not to any more-derived
-//        type, where X1 is defined by:
-//
-//        if (T is convertible to forward_traversal_tag)
-//        {
-//          if (T is convertible to random_access_traversal_tag)
-//            X1 = random_access_host_iterator_tag
-//          else if (T is convertible to bidirectional_traversal_tag)
-//            X1 = bidirectional_host_iterator_tag
-//          else
-//            X1 = forward_host_iterator_tag
-//        }
-//        else
-//        {
-//          if (T is convertible to single_pass_traversal_tag
-//              && R is convertible to V)
-//            X1 = input_host_iterator_tag
-//          else
-//            X1 = T
-//        }
-//
-//     2. category-to-traversal(X) is convertible to the most
-//        derived traversal tag type to which X is also convertible,
-//        and not to any more-derived traversal tag type.
-
-
-template<typename System, typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category;
-
-
-// Thrust's implementation of iterator_facade_default_category is slightly
-// different from Boost's equivalent.
-// Thrust does not check is_convertible<Reference, ValueParam> because Reference
-// may not be a complete type at this point, and implementations of is_convertible
-// typically require that both types be complete.
-// Instead, it simply assumes that if is_convertible<Traversal, single_pass_traversal_tag>,
-// then the category is input_iterator_tag
-
-
-// this is the function for standard system iterators
-template<typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category_std :
-    thrust::detail::eval_if<
-      thrust::detail::is_convertible<Traversal, thrust::forward_traversal_tag>::value,
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::random_access_traversal_tag>::value,
-        thrust::detail::identity_<std::random_access_iterator_tag>,
-        thrust::detail::eval_if<
-          thrust::detail::is_convertible<Traversal, thrust::bidirectional_traversal_tag>::value,
-          thrust::detail::identity_<std::bidirectional_iterator_tag>,
-          thrust::detail::identity_<std::forward_iterator_tag>
-        >
-      >,
-      thrust::detail::eval_if< // XXX note we differ from Boost here
-        thrust::detail::is_convertible<Traversal, thrust::single_pass_traversal_tag>::value,
-        thrust::detail::identity_<std::input_iterator_tag>,
-        thrust::detail::identity_<Traversal>
-      >
-    >
-{
-}; // end iterator_facade_default_category_std
-
-
-// this is the function for host system iterators
-template<typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category_host :
-    thrust::detail::eval_if<
-      thrust::detail::is_convertible<Traversal, thrust::forward_traversal_tag>::value,
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::random_access_traversal_tag>::value,
-        thrust::detail::identity_<thrust::random_access_host_iterator_tag>,
-        thrust::detail::eval_if<
-          thrust::detail::is_convertible<Traversal, thrust::bidirectional_traversal_tag>::value,
-          thrust::detail::identity_<thrust::bidirectional_host_iterator_tag>,
-          thrust::detail::identity_<thrust::forward_host_iterator_tag>
-        >
-      >,
-      thrust::detail::eval_if< // XXX note we differ from Boost here
-        thrust::detail::is_convertible<Traversal, thrust::single_pass_traversal_tag>::value,
-        thrust::detail::identity_<thrust::input_host_iterator_tag>,
-        thrust::detail::identity_<Traversal>
-      >
-    >
-{
-}; // end iterator_facade_default_category_host
-
-
-// this is the function for device system iterators
-template<typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category_device :
-    thrust::detail::eval_if<
-      thrust::detail::is_convertible<Traversal, thrust::forward_traversal_tag>::value,
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::random_access_traversal_tag>::value,
-        thrust::detail::identity_<thrust::random_access_device_iterator_tag>,
-        thrust::detail::eval_if<
-          thrust::detail::is_convertible<Traversal, thrust::bidirectional_traversal_tag>::value,
-          thrust::detail::identity_<thrust::bidirectional_device_iterator_tag>,
-          thrust::detail::identity_<thrust::forward_device_iterator_tag>
-        >
-      >,
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::single_pass_traversal_tag>::value, // XXX note we differ from Boost here
-        thrust::detail::identity_<thrust::input_device_iterator_tag>,
-        thrust::detail::identity_<Traversal>
-      >
-    >
-{
-}; // end iterator_facade_default_category_device
-
-
-// this is the function for any system iterators
-template<typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category_any :
-    thrust::detail::eval_if<
-
-      thrust::detail::is_convertible<Traversal, thrust::forward_traversal_tag>::value,
-
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::random_access_traversal_tag>::value,
-        thrust::detail::identity_<thrust::random_access_universal_iterator_tag>,
-
-        thrust::detail::eval_if<
-          thrust::detail::is_convertible<Traversal, thrust::bidirectional_traversal_tag>::value,
-          thrust::detail::identity_<thrust::bidirectional_universal_iterator_tag>,
-          thrust::detail::identity_<thrust::forward_universal_iterator_tag>
-        >
-      >,
-
-      thrust::detail::eval_if<
-        thrust::detail::is_convertible<Traversal, thrust::single_pass_traversal_tag>::value, // XXX note we differ from Boost here
-        thrust::detail::identity_<thrust::input_universal_iterator_tag>,
-        thrust::detail::identity_<Traversal>
-      >
-    >
-{
-}; // end iterator_facade_default_category_any
-
-
-template<typename System, typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_default_category
-      // check for any system
-    : thrust::detail::eval_if<
-        thrust::detail::is_convertible<System, thrust::any_system_tag>::value,
-        iterator_facade_default_category_any<Traversal, ValueParam, Reference>,
-
-        // check for host system
-        thrust::detail::eval_if<
-          thrust::detail::is_convertible<System, thrust::host_system_tag>::value,
-          iterator_facade_default_category_host<Traversal, ValueParam, Reference>,
-
-          // check for device system
-          thrust::detail::eval_if<
-            thrust::detail::is_convertible<System, thrust::device_system_tag>::value,
-            iterator_facade_default_category_device<Traversal, ValueParam, Reference>,
-
-            // if we don't recognize the system, get a standard iterator category
-            // and combine it with System & Traversal
-            thrust::detail::identity_<
-              thrust::detail::iterator_category_with_system_and_traversal<
-                typename iterator_facade_default_category_std<Traversal, ValueParam, Reference>::type,
-                System,
-                Traversal
-              >
-            >
-          >
-        >
-      >
-{};
-
-
-template<typename System, typename Traversal, typename ValueParam, typename Reference>
-  struct iterator_facade_category_impl
-{
-  typedef typename iterator_facade_default_category<
-    System,Traversal,ValueParam,Reference
-  >::type category;
-
-  // we must be able to deduce both Traversal & System from category
-  // otherwise, munge them all together
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::and_<
-      thrust::detail::is_same<
-        Traversal,
-        typename thrust::detail::iterator_category_to_traversal<category>::type
-      >,
-      thrust::detail::is_same<
-        System,
-        typename thrust::detail::iterator_category_to_system<category>::type
-      >
-    >::value,
-    thrust::detail::identity_<category>,
-    thrust::detail::identity_<thrust::detail::iterator_category_with_system_and_traversal<category,System,Traversal> >
-  >::type type;
-}; // end iterator_facade_category_impl
-
-
-template<typename CategoryOrSystem,
-         typename CategoryOrTraversal,
-         typename ValueParam,
-         typename Reference>
-  struct iterator_facade_category
-{
-  typedef typename
-  thrust::detail::eval_if<
-    thrust::detail::is_iterator_category<CategoryOrTraversal>::value,
-    thrust::detail::identity_<CategoryOrTraversal>, // categories are fine as-is
-    iterator_facade_category_impl<CategoryOrSystem, CategoryOrTraversal, ValueParam, Reference>
-  >::type type;
-}; // end iterator_facade_category
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_traits.inl b/compat/thrust/iterator/detail/iterator_traits.inl
deleted file mode 100644
index 924eabb187..0000000000
--- a/compat/thrust/iterator/detail/iterator_traits.inl
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file iterator_traits.inl
- *  \brief Inline file for iterator_traits.h.
- */
-
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/detail/iterator_category_to_traversal.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-template<typename Iterator>
-  struct iterator_value
-{
-  typedef typename thrust::iterator_traits<Iterator>::value_type type;
-}; // end iterator_value
-
-
-template<typename Iterator>
-  struct iterator_pointer
-{
-  typedef typename thrust::iterator_traits<Iterator>::pointer type;
-}; // end iterator_pointer
-
-
-template<typename Iterator>
-  struct iterator_reference
-{
-  typedef typename iterator_traits<Iterator>::reference type;
-}; // end iterator_reference
-
-
-template<typename Iterator>
-  struct iterator_difference
-{
-  typedef typename thrust::iterator_traits<Iterator>::difference_type type;
-}; // end iterator_difference
-
-
-template<typename Iterator>
-  struct iterator_system
-    : detail::iterator_category_to_system<
-        typename thrust::iterator_traits<Iterator>::iterator_category
-      >
-{
-}; // end iterator_system
-
-// specialize iterator_system for void *, which has no category
-template<>
-  struct iterator_system<void *>
-{
-  typedef thrust::iterator_system<int*>::type type;
-}; // end iterator_system<void*>
-
-template<>
-  struct iterator_system<const void *>
-{
-  typedef thrust::iterator_system<const int*>::type type;
-}; // end iterator_system<void*>
-
-
-template <typename Iterator>
-  struct iterator_traversal
-    : detail::iterator_category_to_traversal<
-        typename thrust::iterator_traits<Iterator>::iterator_category
-      >
-{
-}; // end iterator_traversal
-
-namespace detail
-{
-
-template <typename T>
-  struct is_iterator_traversal
-    : thrust::detail::is_convertible<T, incrementable_traversal_tag>
-{
-}; // end is_iterator_traversal
-
-
-template<typename T>
-  struct is_iterator_system
-    : detail::or_<
-        detail::is_convertible<T, any_system_tag>,
-        detail::or_<
-          detail::is_convertible<T, host_system_tag>,
-          detail::is_convertible<T, device_system_tag>
-        >
-      >
-{
-}; // end is_iterator_system
-
-
-} // end namespace detail
-} // end namespace thrust
-
diff --git a/compat/thrust/iterator/detail/iterator_traversal_tags.h b/compat/thrust/iterator/detail/iterator_traversal_tags.h
deleted file mode 100644
index dcbebf3fda..0000000000
--- a/compat/thrust/iterator/detail/iterator_traversal_tags.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-namespace thrust
-{
-
-// define Boost's traversal tags
-struct no_traversal_tag {};
-
-struct incrementable_traversal_tag
-  : no_traversal_tag {};
-
-struct single_pass_traversal_tag
-  : incrementable_traversal_tag {};
-
-struct forward_traversal_tag
-  : single_pass_traversal_tag {};
-
-struct bidirectional_traversal_tag
-  : forward_traversal_tag {};
-
-struct random_access_traversal_tag
-  : bidirectional_traversal_tag {};
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/minimum_category.h b/compat/thrust/iterator/detail/minimum_category.h
deleted file mode 100644
index e07e09636e..0000000000
--- a/compat/thrust/iterator/detail/minimum_category.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits/minimum_type.h>
-
-namespace thrust
-{
-
-namespace detail
-{ 
-
-template<typename T1,
-         typename T2  = minimum_type_detail::any_conversion,
-         typename T3  = minimum_type_detail::any_conversion,
-         typename T4  = minimum_type_detail::any_conversion,
-         typename T5  = minimum_type_detail::any_conversion,
-         typename T6  = minimum_type_detail::any_conversion,
-         typename T7  = minimum_type_detail::any_conversion,
-         typename T8  = minimum_type_detail::any_conversion,
-         typename T9  = minimum_type_detail::any_conversion,
-         typename T10 = minimum_type_detail::any_conversion,
-         typename T11 = minimum_type_detail::any_conversion,
-         typename T12 = minimum_type_detail::any_conversion,
-         typename T13 = minimum_type_detail::any_conversion,
-         typename T14 = minimum_type_detail::any_conversion,
-         typename T15 = minimum_type_detail::any_conversion,
-         typename T16 = minimum_type_detail::any_conversion>
-  struct minimum_category
-    : minimum_type<T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16>
-{
-}; // end minimum_category
-
-} // end detail
-
-} // end thrust
-
-
diff --git a/compat/thrust/iterator/detail/minimum_system.h b/compat/thrust/iterator/detail/minimum_system.h
deleted file mode 100644
index 5448a0d1f0..0000000000
--- a/compat/thrust/iterator/detail/minimum_system.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits/minimum_type.h>
-
-namespace thrust
-{
-namespace detail
-{ 
-
-template<typename T1,
-         typename T2  = minimum_type_detail::any_conversion,
-         typename T3  = minimum_type_detail::any_conversion,
-         typename T4  = minimum_type_detail::any_conversion,
-         typename T5  = minimum_type_detail::any_conversion,
-         typename T6  = minimum_type_detail::any_conversion,
-         typename T7  = minimum_type_detail::any_conversion,
-         typename T8  = minimum_type_detail::any_conversion,
-         typename T9  = minimum_type_detail::any_conversion,
-         typename T10 = minimum_type_detail::any_conversion,
-         typename T11 = minimum_type_detail::any_conversion,
-         typename T12 = minimum_type_detail::any_conversion,
-         typename T13 = minimum_type_detail::any_conversion,
-         typename T14 = minimum_type_detail::any_conversion,
-         typename T15 = minimum_type_detail::any_conversion,
-         typename T16 = minimum_type_detail::any_conversion>
-  struct minimum_system
-    : minimum_type<T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16>
-{
-}; // end minimum_system
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/normal_iterator.h b/compat/thrust/iterator/detail/normal_iterator.h
deleted file mode 100644
index 7fe61bfed3..0000000000
--- a/compat/thrust/iterator/detail/normal_iterator.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file normal_iterator.h
- *  \brief Defines the interface to an iterator class
- *         which adapts a pointer type.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/detail/is_trivial_iterator.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-template<typename Pointer>
-  class normal_iterator
-    : public iterator_adaptor<
-        normal_iterator<Pointer>,
-        Pointer
-      >
-{
-  typedef iterator_adaptor<normal_iterator<Pointer>, Pointer> super_t;
-
-  public:
-    __host__ __device__
-    normal_iterator() {}
-
-    __host__ __device__
-    normal_iterator(Pointer p)
-      : super_t(p) {}
-    
-    template<typename OtherPointer>
-    __host__ __device__
-    normal_iterator(const normal_iterator<OtherPointer> &other,
-                    typename thrust::detail::enable_if_convertible<
-                      OtherPointer,
-                      Pointer
-                    >::type * = 0)
-      : super_t(other.base()) {}
-
-}; // end normal_iterator
-
-
-template<typename Pointer>
-  inline __host__ __device__ normal_iterator<Pointer> make_normal_iterator(Pointer ptr)
-{
-  return normal_iterator<Pointer>(ptr);
-}
-
-
-template<typename T> struct is_trivial_iterator< normal_iterator<T> > : public true_type {};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/permutation_iterator_base.h b/compat/thrust/iterator/detail/permutation_iterator_base.h
deleted file mode 100644
index a145b88aec..0000000000
--- a/compat/thrust/iterator/detail/permutation_iterator_base.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-
-namespace thrust
-{
-
-template<typename,typename> class permutation_iterator;
-
-
-namespace detail
-{
-
-template<typename ElementIterator,
-         typename IndexIterator>
-  struct permutation_iterator_base
-{
-  typedef typename thrust::iterator_system<ElementIterator>::type System1;
-  typedef typename thrust::iterator_system<IndexIterator>::type System2;
-
-  typedef thrust::iterator_adaptor<
-    permutation_iterator<ElementIterator,IndexIterator>,
-    IndexIterator,
-    typename thrust::iterator_value<ElementIterator>::type,
-    typename detail::minimum_system<System1,System2>::type,
-    thrust::use_default,
-    typename thrust::iterator_reference<ElementIterator>::type
-  > type;
-}; // end permutation_iterator_base
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/retag.h b/compat/thrust/iterator/detail/retag.h
deleted file mode 100644
index 4417fa5604..0000000000
--- a/compat/thrust/iterator/detail/retag.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/tagged_iterator.h>
-#include <thrust/detail/pointer.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-
-// we can retag an iterator if FromTag converts to ToTag
-// or vice versa
-template<typename FromTag, typename ToTag>
-  struct is_retaggable
-    : integral_constant<
-        bool,
-        (is_convertible<FromTag,ToTag>::value || is_convertible<ToTag,FromTag>::value)
-      >
-{};
-
-
-template<typename FromTag, typename ToTag, typename Result>
-  struct enable_if_retaggable
-    : enable_if<
-        is_retaggable<FromTag,ToTag>::value,
-        Result
-      >
-{}; // end enable_if_retaggable
-
-
-} // end detail
-
-
-template<typename Tag, typename Iterator>
-  thrust::detail::tagged_iterator<Iterator,Tag>
-    reinterpret_tag(Iterator iter)
-{
-  return thrust::detail::tagged_iterator<Iterator,Tag>(iter);
-} // end reinterpret_tag()
-
-
-// specialization for raw pointer
-template<typename Tag, typename T>
-  thrust::pointer<T,Tag>
-    reinterpret_tag(T *ptr)
-{
-  return thrust::pointer<T,Tag>(ptr);
-} // end reinterpret_tag()
-
-
-// specialization for thrust::pointer
-template<typename Tag, typename T, typename OtherTag, typename Reference, typename Derived>
-  thrust::pointer<T,Tag>
-    reinterpret_tag(thrust::pointer<T,OtherTag,Reference,Derived> ptr)
-{
-  return reinterpret_tag<Tag>(ptr.get());
-} // end reinterpret_tag()
-
-
-// avoid deeply-nested tagged_iterator
-template<typename Tag, typename BaseIterator, typename OtherTag>
-  thrust::detail::tagged_iterator<BaseIterator,Tag>
-    reinterpret_tag(thrust::detail::tagged_iterator<BaseIterator,OtherTag> iter)
-{
-  return reinterpret_tag<Tag>(iter.base());
-} // end reinterpret_tag()
-
-
-template<typename Tag, typename Iterator>
-  typename thrust::detail::enable_if_retaggable<
-    typename thrust::iterator_system<Iterator>::type,
-    Tag,
-    thrust::detail::tagged_iterator<Iterator,Tag>
-  >::type
-    retag(Iterator iter)
-{
-  return reinterpret_tag<Tag>(iter);
-} // end retag()
-
-
-// specialization for raw pointer
-template<typename Tag, typename T>
-  typename thrust::detail::enable_if_retaggable<
-    typename thrust::iterator_system<T*>::type,
-    Tag,
-    thrust::pointer<T,Tag>
-  >::type
-    retag(T *ptr)
-{
-  return reinterpret_tag<Tag>(ptr);
-} // end retag()
-
-
-// specialization for thrust::pointer
-template<typename Tag, typename T, typename OtherTag>
-  typename thrust::detail::enable_if_retaggable<
-    OtherTag,
-    Tag,
-    thrust::pointer<T,Tag>
-  >::type
-    retag(thrust::pointer<T,OtherTag> ptr)
-{
-  return reinterpret_tag<Tag>(ptr);
-} // end retag()
-
-
-// avoid deeply-nested tagged_iterator
-template<typename Tag, typename BaseIterator, typename OtherTag>
-  typename thrust::detail::enable_if_retaggable<
-    OtherTag,
-    Tag,
-    thrust::detail::tagged_iterator<BaseIterator,Tag>
-  >::type
-    retag(thrust::detail::tagged_iterator<BaseIterator,OtherTag> iter)
-{
-  return reinterpret_tag<Tag>(iter);
-} // end retag()
-
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/reverse_iterator.inl b/compat/thrust/iterator/detail/reverse_iterator.inl
deleted file mode 100644
index 03e9032130..0000000000
--- a/compat/thrust/iterator/detail/reverse_iterator.inl
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-namespace detail
-{
-
-__thrust_hd_warning_disable__
-template<typename Iterator>
-__host__ __device__
-  Iterator prior(Iterator x)
-{
-  return --x;
-} // end prior()
-
-} // end detail
-
-template<typename BidirectionalIterator>
-  reverse_iterator<BidirectionalIterator>
-    ::reverse_iterator(BidirectionalIterator x)
-      :super_t(x)
-{
-} // end reverse_iterator::reverse_iterator()
-
-template<typename BidirectionalIterator>
-  template<typename OtherBidirectionalIterator>
-    reverse_iterator<BidirectionalIterator>
-      ::reverse_iterator(reverse_iterator<OtherBidirectionalIterator> const &r
-// XXX msvc screws this up
-#ifndef _MSC_VER
-                     , typename thrust::detail::enable_if<
-                         thrust::detail::is_convertible<
-                           OtherBidirectionalIterator,
-                           BidirectionalIterator
-                         >::value
-                       >::type *
-#endif // _MSC_VER
-                     )
-        :super_t(r.base())
-{
-} // end reverse_iterator::reverse_iterator()
-
-template<typename BidirectionalIterator>
-  typename reverse_iterator<BidirectionalIterator>::super_t::reference
-    reverse_iterator<BidirectionalIterator>
-      ::dereference(void) const
-{
-  return *thrust::detail::prior(this->base());
-} // end reverse_iterator::increment()
-
-template<typename BidirectionalIterator>
-  void reverse_iterator<BidirectionalIterator>
-    ::increment(void)
-{
-  --this->base_reference();
-} // end reverse_iterator::increment()
-
-template<typename BidirectionalIterator>
-  void reverse_iterator<BidirectionalIterator>
-    ::decrement(void)
-{
-  ++this->base_reference();
-} // end reverse_iterator::decrement()
-
-template<typename BidirectionalIterator>
-  void reverse_iterator<BidirectionalIterator>
-    ::advance(typename super_t::difference_type n)
-{
-  this->base_reference() += -n;
-} // end reverse_iterator::advance()
-
-template<typename BidirectionalIterator>
-  template<typename OtherBidirectionalIterator>
-    typename reverse_iterator<BidirectionalIterator>::super_t::difference_type
-      reverse_iterator<BidirectionalIterator>
-        ::distance_to(reverse_iterator<OtherBidirectionalIterator> const &y) const
-{
-  return this->base_reference() - y.base();
-} // end reverse_iterator::distance_to()
-
-template<typename BidirectionalIterator>
-__host__ __device__
-reverse_iterator<BidirectionalIterator> make_reverse_iterator(BidirectionalIterator x)
-{
-  return reverse_iterator<BidirectionalIterator>(x);
-} // end make_reverse_iterator()
-
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/reverse_iterator_base.h b/compat/thrust/iterator/detail/reverse_iterator_base.h
deleted file mode 100644
index c10c5b73ff..0000000000
--- a/compat/thrust/iterator/detail/reverse_iterator_base.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-template <typename> class reverse_iterator;
-
-namespace detail
-{
-
-template<typename BidirectionalIterator>
-  struct reverse_iterator_base
-{
-  typedef thrust::iterator_adaptor<
-    thrust::reverse_iterator<BidirectionalIterator>,
-    BidirectionalIterator
-  > type;
-}; // end reverse_iterator_base
-
-} // end detail
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/tagged_iterator.h b/compat/thrust/iterator/detail/tagged_iterator.h
deleted file mode 100644
index 69e6445183..0000000000
--- a/compat/thrust/iterator/detail/tagged_iterator.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/use_default.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-template <typename,typename> class tagged_iterator;
-
-template<typename Iterator, typename Tag>
-  struct tagged_iterator_base
-{
-  typedef thrust::iterator_adaptor<
-    tagged_iterator<Iterator,Tag>,
-    Iterator,
-    typename thrust::iterator_value<Iterator>::type,
-    Tag,
-    typename thrust::iterator_traversal<Iterator>::type,
-    typename thrust::iterator_reference<Iterator>::type,
-    typename thrust::iterator_difference<Iterator>::type
-  > type;
-}; // end tagged_iterator_base
-
-template<typename Iterator, typename Tag>
-  class tagged_iterator
-    : public tagged_iterator_base<Iterator,Tag>::type
-{
-  private:
-    typedef typename tagged_iterator_base<Iterator,Tag>::type super_t;
-
-  public:
-    __host__ __device__
-    tagged_iterator(void) {}
-
-    __host__ __device__
-    explicit tagged_iterator(Iterator x)
-      : super_t(x) {}
-}; // end tagged_iterator
-
-
-// specialize is_trivial_iterator for tagged_iterator
-template<typename> struct is_trivial_iterator;
-
-// tagged_iterator is trivial if its base iterator is
-template<typename BaseIterator, typename Tag>
-  struct is_trivial_iterator<tagged_iterator<BaseIterator,Tag> >
-    : is_trivial_iterator<BaseIterator>
-{};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/transform_iterator.inl b/compat/thrust/iterator/detail/transform_iterator.inl
deleted file mode 100644
index a5a36a78be..0000000000
--- a/compat/thrust/iterator/detail/transform_iterator.inl
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/iterator/iterator_adaptor.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/result_of.h>
-
-namespace thrust
-{
-
-template <class UnaryFunction, class Iterator, class Reference, class Value>
-  class transform_iterator;
-  
-namespace detail 
-{
-
-// Compute the iterator_adaptor instantiation to be used for transform_iterator
-template <class UnaryFunc, class Iterator, class Reference, class Value>
-struct transform_iterator_base
-{
- private:
-    // By default, dereferencing the iterator yields the same as the function.
-    typedef typename thrust::detail::ia_dflt_help<
-      Reference,
-      thrust::detail::result_of<UnaryFunc(typename thrust::iterator_value<Iterator>::type)>
-    >::type reference;
-
-    // To get the default for Value: remove any reference on the
-    // result type, but retain any constness to signal
-    // non-writability.  Note that if we adopt Thomas' suggestion
-    // to key non-writability *only* on the Reference argument,
-    // we'd need to strip constness here as well.
-    typedef typename thrust::detail::ia_dflt_help<
-      Value,
-      thrust::detail::remove_reference<reference>
-    >::type cv_value_type;
-
- public:
-    typedef thrust::iterator_adaptor
-    <
-        transform_iterator<UnaryFunc, Iterator, Reference, Value>
-      , Iterator
-      , cv_value_type
-      , thrust::use_default   // Leave the system alone
-        //, thrust::use_default   // Leave the traversal alone
-        // use the Iterator's category to let any system iterators remain random access even though
-        // transform_iterator's reference type may not be a reference
-        // XXX figure out why only iterators whose reference types are true references are random access
-        , typename thrust::iterator_traits<Iterator>::iterator_category
-      , reference
-    > type;
-};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/tuple_of_iterator_references.h b/compat/thrust/iterator/detail/tuple_of_iterator_references.h
deleted file mode 100644
index fdbf6b8f66..0000000000
--- a/compat/thrust/iterator/detail/tuple_of_iterator_references.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/tuple.h>
-#include <thrust/pair.h>
-#include <thrust/detail/reference_forward_declaration.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-  
-template<
-  typename T0, typename T1, typename T2,
-  typename T3, typename T4, typename T5,
-  typename T6, typename T7, typename T8,
-  typename T9
->
-  class tuple_of_iterator_references
-    : public thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9>
-{
-  private:
-    typedef thrust::tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> super_t;
-
-  public:
-    // allow implicit construction from tuple<refs>
-    inline __host__ __device__
-    tuple_of_iterator_references(const super_t &other)
-      : super_t(other)
-    {}
-
-    // allow assignment from tuples
-    // XXX might be worthwhile to guard this with an enable_if is_assignable
-    template<typename U1, typename U2>
-    inline __host__ __device__
-    tuple_of_iterator_references &operator=(const detail::cons<U1,U2> &other)
-    {
-      super_t::operator=(other);
-      return *this;
-    }
-
-    // allow assignment from pairs
-    // XXX might be worthwhile to guard this with an enable_if is_assignable
-    template<typename U1, typename U2>
-    inline __host__ __device__
-    tuple_of_iterator_references &operator=(const thrust::pair<U1,U2> &other)
-    {
-      super_t::operator=(other);
-      return *this;
-    }
-
-    // allow assignment from reference<tuple>
-    // XXX perhaps we should generalize to reference<T>
-    //     we could captures reference<pair> this way
-    template<typename U0, typename U1, typename U2,
-             typename U3, typename U4, typename U5,
-             typename U6, typename U7, typename U8,
-             typename U9,
-             typename Pointer, typename Derived>
-    inline __host__ __device__
-// XXX gcc-4.2 crashes on is_assignable
-//    typename thrust::detail::enable_if<
-//      thrust::detail::is_assignable<
-//        super_t,
-//        const thrust::tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>
-//      >::value,
-//      tuple_of_iterator_references &
-//    >::type
-    tuple_of_iterator_references &
-    operator=(const thrust::reference<thrust::tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9>, Pointer, Derived> &other)
-    {
-      typedef thrust::tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9> tuple_type;
-
-      // XXX perhaps this could be accelerated
-      tuple_type other_tuple = other;
-      super_t::operator=(other_tuple);
-      return *this;
-    }
-
-
-    // duplicate thrust::tuple's constructors
-    inline __host__ __device__
-    tuple_of_iterator_references() {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0)
-      : super_t(t0,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1)
-      : super_t(t0, t1,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2)
-      : super_t(t0, t1, t2,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3)
-      : super_t(t0, t1, t2, t3,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4)
-      : super_t(t0, t1, t2, t3, t4,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4,
-                                 typename access_traits<T5>::parameter_type t5)
-      : super_t(t0, t1, t2, t3, t4, t5,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4,
-                                 typename access_traits<T5>::parameter_type t5,
-                                 typename access_traits<T6>::parameter_type t6)
-      : super_t(t0, t1, t2, t3, t4, t5, t6,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4,
-                                 typename access_traits<T5>::parameter_type t5,
-                                 typename access_traits<T6>::parameter_type t6,
-                                 typename access_traits<T7>::parameter_type t7)
-      : super_t(t0, t1, t2, t3, t4, t5, t6, t7,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4,
-                                 typename access_traits<T5>::parameter_type t5,
-                                 typename access_traits<T6>::parameter_type t6,
-                                 typename access_traits<T7>::parameter_type t7,
-                                 typename access_traits<T8>::parameter_type t8)
-      : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8,
-                static_cast<const null_type&>(null_type()))
-    {}
-
-    inline __host__ __device__ 
-    tuple_of_iterator_references(typename access_traits<T0>::parameter_type t0,
-                                 typename access_traits<T1>::parameter_type t1,
-                                 typename access_traits<T2>::parameter_type t2,
-                                 typename access_traits<T3>::parameter_type t3,
-                                 typename access_traits<T4>::parameter_type t4,
-                                 typename access_traits<T5>::parameter_type t5,
-                                 typename access_traits<T6>::parameter_type t6,
-                                 typename access_traits<T7>::parameter_type t7,
-                                 typename access_traits<T8>::parameter_type t8,
-                                 typename access_traits<T9>::parameter_type t9)
-      : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9)
-    {}
-};
-
-
-} // end detail
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/universal_categories.h b/compat/thrust/iterator/detail/universal_categories.h
deleted file mode 100644
index 7c3922210c..0000000000
--- a/compat/thrust/iterator/detail/universal_categories.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_categories.h>
-
-namespace thrust
-{
-
-// define these types without inheritance to avoid ambiguous conversion to base classes
-
-struct input_universal_iterator_tag
-{
-  operator input_host_iterator_tag () {return input_host_iterator_tag();}
-
-  operator input_device_iterator_tag () {return input_device_iterator_tag();}
-};
-
-struct output_universal_iterator_tag
-{
-  operator output_host_iterator_tag () {return output_host_iterator_tag();}
-
-  operator output_device_iterator_tag () {return output_device_iterator_tag();}
-};
-
-struct forward_universal_iterator_tag
-  : input_universal_iterator_tag
-{
-  operator forward_host_iterator_tag () {return forward_host_iterator_tag();};
-
-  operator forward_device_iterator_tag () {return forward_device_iterator_tag();};
-};
-
-struct bidirectional_universal_iterator_tag
-  : forward_universal_iterator_tag
-{
-  operator bidirectional_host_iterator_tag () {return bidirectional_host_iterator_tag();};
-
-  operator bidirectional_device_iterator_tag () {return bidirectional_device_iterator_tag();};
-};
-
-
-namespace detail
-{
-
-// create this struct to control conversion precedence in random_access_universal_iterator_tag
-template<typename T>
-struct one_degree_of_separation
-  : T
-{
-};
-
-} // end detail
-
-
-struct random_access_universal_iterator_tag
-{
-  // these conversions are all P0
-  operator random_access_host_iterator_tag () {return random_access_host_iterator_tag();};
-
-  operator random_access_device_iterator_tag () {return random_access_device_iterator_tag();};
-
-  // bidirectional_universal_iterator_tag is P1
-  operator detail::one_degree_of_separation<bidirectional_universal_iterator_tag> () {return detail::one_degree_of_separation<bidirectional_universal_iterator_tag>();}
-
-};
-
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/zip_iterator.inl b/compat/thrust/iterator/detail/zip_iterator.inl
deleted file mode 100644
index fddd0ada11..0000000000
--- a/compat/thrust/iterator/detail/zip_iterator.inl
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/tuple_transform.h>
-
-namespace thrust
-{
-
-
-template <typename IteratorTuple>
-  zip_iterator<IteratorTuple>
-    ::zip_iterator(void)
-{
-} // end zip_iterator::zip_iterator()
-
-
-template <typename IteratorTuple>
-  zip_iterator<IteratorTuple>
-    ::zip_iterator(IteratorTuple iterator_tuple)
-      :m_iterator_tuple(iterator_tuple)
-{
-} // end zip_iterator::zip_iterator()
-
-
-template <typename IteratorTuple>
-  template <typename OtherIteratorTuple>
-    zip_iterator<IteratorTuple>
-      ::zip_iterator(const zip_iterator<OtherIteratorTuple> &other,
-                     typename thrust::detail::enable_if_convertible<
-                       OtherIteratorTuple,
-                       IteratorTuple
-                     >::type *)
-        :m_iterator_tuple(other.get_iterator_tuple())
-{
-} // end zip_iterator::zip_iterator()
-
-
-template <typename IteratorTuple>
-const IteratorTuple &zip_iterator<IteratorTuple>
-  ::get_iterator_tuple(void) const
-{
-  return m_iterator_tuple;
-} // end zip_iterator::get_iterator_tuple()
-
-
-template <typename IteratorTuple>
-  typename zip_iterator<IteratorTuple>::super_t::reference
-    zip_iterator<IteratorTuple>
-      ::dereference(void) const
-{
-  using namespace detail::tuple_impl_specific;
-
-  return thrust::detail::tuple_host_device_transform<detail::dereference_iterator::template apply>(get_iterator_tuple(), detail::dereference_iterator());
-} // end zip_iterator::dereference()
-
-
-__thrust_hd_warning_disable__
-template <typename IteratorTuple>
-  template <typename OtherIteratorTuple>
-    bool zip_iterator<IteratorTuple>
-      ::equal(const zip_iterator<OtherIteratorTuple> &other) const
-{
-  return get<0>(get_iterator_tuple()) == get<0>(other.get_iterator_tuple());
-} // end zip_iterator::equal()
-
-
-template <typename IteratorTuple>
-  void zip_iterator<IteratorTuple>
-    ::advance(typename super_t::difference_type n)
-{
-  using namespace detail::tuple_impl_specific;
-
-  // XXX note that we use a pointer to System to dispatch to avoid
-  //     default construction of a System
-  typename thrust::iterator_system<zip_iterator>::type *use_me_to_dispatch = 0;
-
-  // dispatch on system
-  tuple_for_each(m_iterator_tuple,
-                 detail::advance_iterator<typename super_t::difference_type>(n),
-                 use_me_to_dispatch);
-} // end zip_iterator::advance()
-
-
-template <typename IteratorTuple>
-  void zip_iterator<IteratorTuple>
-    ::increment(void)
-{
-  using namespace detail::tuple_impl_specific;
-
-  // XXX note that we use a pointer to System to dispatch to avoid
-  //     default construction of a System
-  typename thrust::iterator_system<zip_iterator>::type *use_me_to_dispatch = 0;
-
-  // dispatch on system
-  tuple_for_each(m_iterator_tuple, detail::increment_iterator(),
-                 use_me_to_dispatch);
-} // end zip_iterator::increment()
-
-
-template <typename IteratorTuple>
-  void zip_iterator<IteratorTuple>
-    ::decrement(void)
-{
-  using namespace detail::tuple_impl_specific;
-
-  // XXX note that we use a pointer to System to dispatch to avoid
-  //     default construction of a System
-  typename thrust::iterator_system<zip_iterator>::type *use_me_to_dispatch = 0;
-
-  // dispatch on system
-  tuple_for_each(m_iterator_tuple, detail::decrement_iterator(),
-                 use_me_to_dispatch);
-} // end zip_iterator::decrement()
-
-
-__thrust_hd_warning_disable__
-template <typename IteratorTuple>
-  template <typename OtherIteratorTuple>
-    typename zip_iterator<IteratorTuple>::super_t::difference_type
-      zip_iterator<IteratorTuple>
-        ::distance_to(const zip_iterator<OtherIteratorTuple> &other) const
-{
-  return get<0>(other.get_iterator_tuple()) - get<0>(get_iterator_tuple());
-} // end zip_iterator::distance_to()
-
-
-template <typename IteratorTuple>
-  zip_iterator<IteratorTuple> make_zip_iterator(IteratorTuple t)
-{
-  return zip_iterator<IteratorTuple>(t);
-} // end make_zip_iterator()
-
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/detail/zip_iterator_base.h b/compat/thrust/iterator/detail/zip_iterator_base.h
deleted file mode 100644
index 9dd7789e52..0000000000
--- a/compat/thrust/iterator/detail/zip_iterator_base.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/detail/minimum_category.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/tuple_meta_transform.h>
-#include <thrust/detail/tuple_transform.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/tuple_of_iterator_references.h>
-
-namespace thrust
-{
-
-// forward declare zip_iterator for zip_iterator_base
-template<typename IteratorTuple> class zip_iterator;
-
-namespace detail
-{
-
-
-// Functors to be used with tuple algorithms
-//
-template<typename DiffType>
-class advance_iterator
-{
-public:
-  inline __host__ __device__
-  advance_iterator(DiffType step) : m_step(step) {}
-  
-  template<typename Iterator>
-  inline __host__ __device__
-  void operator()(Iterator& it) const
-  { it += m_step; }
-
-private:
-  DiffType m_step;
-}; // end advance_iterator
-
-
-struct increment_iterator
-{
-  template<typename Iterator>
-  inline __host__ __device__
-  void operator()(Iterator& it)
-  { ++it; }
-}; // end increment_iterator
-
-
-struct decrement_iterator
-{
-  template<typename Iterator>
-  inline __host__ __device__
-  void operator()(Iterator& it)
-  { --it; }
-}; // end decrement_iterator
-
-
-struct dereference_iterator
-{
-  template<typename Iterator>
-  struct apply
-  { 
-    typedef typename
-      iterator_traits<Iterator>::reference
-    type;
-  }; // end apply
-
-  // XXX silence warnings of the form "calling a __host__ function from a __host__ __device__ function is not allowed
-  __thrust_hd_warning_disable__
-  template<typename Iterator>
-  __host__ __device__
-    typename apply<Iterator>::type operator()(Iterator const& it)
-  {
-    return *it;
-  }
-}; // end dereference_iterator
-
-
-// The namespace tuple_impl_specific provides two meta-
-// algorithms and two algorithms for tuples.
-namespace tuple_impl_specific
-{
-
-// define apply1 for tuple_meta_transform_impl
-template<typename UnaryMetaFunctionClass, class Arg>
-  struct apply1
-    : UnaryMetaFunctionClass::template apply<Arg>
-{
-}; // end apply1
-
-
-// define apply2 for tuple_meta_accumulate_impl
-template<typename UnaryMetaFunctionClass, class Arg1, class Arg2>
-  struct apply2
-    : UnaryMetaFunctionClass::template apply<Arg1,Arg2>
-{
-}; // end apply2
-
-
-// Meta-accumulate algorithm for tuples. Note: The template 
-// parameter StartType corresponds to the initial value in 
-// ordinary accumulation.
-//
-template<class Tuple, class BinaryMetaFun, class StartType>
-  struct tuple_meta_accumulate;
-
-template<
-    typename Tuple
-  , class BinaryMetaFun
-  , typename StartType
->
-  struct tuple_meta_accumulate_impl
-{
-   typedef typename apply2<
-       BinaryMetaFun
-     , typename Tuple::head_type
-     , typename tuple_meta_accumulate<
-           typename Tuple::tail_type
-         , BinaryMetaFun
-         , StartType 
-       >::type
-   >::type type;
-};
-
-
-template<
-    typename Tuple
-  , class BinaryMetaFun
-  , typename StartType
->
-struct tuple_meta_accumulate
-  : thrust::detail::eval_if<
-        thrust::detail::is_same<Tuple, thrust::null_type>::value
-      , thrust::detail::identity_<StartType>
-      , tuple_meta_accumulate_impl<
-            Tuple
-          , BinaryMetaFun
-          , StartType
-        >
-    > // end eval_if
-{
-}; // end tuple_meta_accumulate
-
-
-// transform algorithm for tuples. The template parameter Fun
-// must be a unary functor which is also a unary metafunction
-// class that computes its return type based on its argument
-// type. For example:
-//
-// struct to_ptr
-// {
-//     template <class Arg>
-//     struct apply
-//     {
-//          typedef Arg* type;
-//     }
-//
-//     template <class Arg>
-//     Arg* operator()(Arg x);
-// };
-
-
-
-// for_each algorithm for tuples.
-//
-template<typename Fun, typename System>
-inline __host__ __device__
-Fun tuple_for_each(thrust::null_type, Fun f, System *)
-{
-  return f;
-} // end tuple_for_each()
-
-
-template<typename Tuple, typename Fun, typename System>
-inline __host__ __device__
-Fun tuple_for_each(Tuple& t, Fun f, System *dispatch_tag)
-{ 
-  f( t.get_head() );
-  return tuple_for_each(t.get_tail(), f, dispatch_tag);
-} // end tuple_for_each()
-
-
-template<typename Tuple, typename Fun>
-inline __host__ __device__
-Fun tuple_for_each(Tuple& t, Fun f, thrust::host_system_tag *dispatch_tag)
-{ 
-// XXX this path is required in order to accomodate pure host iterators
-//     (such as std::vector::iterator) in a zip_iterator
-#ifndef __CUDA_ARCH__
-  f( t.get_head() );
-  return tuple_for_each(t.get_tail(), f, dispatch_tag);
-#else
-  // this code will never be called
-  return f;
-#endif
-} // end tuple_for_each()
-
-
-// Equality of tuples. NOTE: "==" for tuples currently (7/2003)
-// has problems under some compilers, so I just do my own.
-// No point in bringing in a bunch of #ifdefs here. This is
-// going to go away with the next tuple implementation anyway.
-//
-__host__ __device__
-inline bool tuple_equal(thrust::null_type, thrust::null_type)
-{ return true; }
-
-
-template<typename Tuple1, typename Tuple2>
-__host__ __device__
-bool tuple_equal(Tuple1 const& t1, Tuple2 const& t2)
-{ 
-  return t1.get_head() == t2.get_head() && 
-  tuple_equal(t1.get_tail(), t2.get_tail());
-} // end tuple_equal()
-
-} // end end tuple_impl_specific
-
-
-// Metafunction to obtain the type of the tuple whose element types
-// are the value_types of an iterator tupel.
-//
-template<typename IteratorTuple>
-  struct tuple_of_value_types
-    : tuple_meta_transform<
-          IteratorTuple,
-          iterator_value
-        >
-{
-}; // end tuple_of_value_types
-
-
-struct minimum_category_lambda
-{
-  template<typename T1, typename T2>
-    struct apply : minimum_category<T1,T2>
-  {};
-};
-
-
-
-// Metafunction to obtain the minimal traversal tag in a tuple
-// of iterators.
-//
-template<typename IteratorTuple>
-struct minimum_traversal_category_in_iterator_tuple
-{
-  typedef typename tuple_meta_transform<
-      IteratorTuple
-    , thrust::iterator_traversal
-  >::type tuple_of_traversal_tags;
-      
-  typedef typename tuple_impl_specific::tuple_meta_accumulate<
-      tuple_of_traversal_tags
-    , minimum_category_lambda
-    , thrust::random_access_traversal_tag
-  >::type type;
-};
-
-
-struct minimum_system_lambda
-{
-  template<typename T1, typename T2>
-    struct apply : minimum_system<T1,T2>
-  {};
-};
-
-
-
-// Metafunction to obtain the minimal system tag in a tuple
-// of iterators.
-template<typename IteratorTuple>
-struct minimum_system_in_iterator_tuple
-{
-  typedef typename thrust::detail::tuple_meta_transform<
-    IteratorTuple,
-    thrust::iterator_system
-  >::type tuple_of_system_tags;
-
-  typedef typename tuple_impl_specific::tuple_meta_accumulate<
-    tuple_of_system_tags,
-    minimum_system_lambda,
-    thrust::any_system_tag
-  >::type type;
-};
-
-namespace zip_iterator_base_ns
-{
-
-
-template<int i, typename Tuple>
-  struct tuple_elements_helper
-    : eval_if<
-        (i < tuple_size<Tuple>::value),
-        tuple_element<i,Tuple>,
-        identity_<thrust::null_type>
-      >
-{};
-
-
-template<typename Tuple>
-  struct tuple_elements
-{
-  typedef typename tuple_elements_helper<0,Tuple>::type T0;
-  typedef typename tuple_elements_helper<1,Tuple>::type T1;
-  typedef typename tuple_elements_helper<2,Tuple>::type T2;
-  typedef typename tuple_elements_helper<3,Tuple>::type T3;
-  typedef typename tuple_elements_helper<4,Tuple>::type T4;
-  typedef typename tuple_elements_helper<5,Tuple>::type T5;
-  typedef typename tuple_elements_helper<6,Tuple>::type T6;
-  typedef typename tuple_elements_helper<7,Tuple>::type T7;
-  typedef typename tuple_elements_helper<8,Tuple>::type T8;
-  typedef typename tuple_elements_helper<9,Tuple>::type T9;
-};
-
-
-template<typename IteratorTuple>
-  struct tuple_of_iterator_references
-{
-  // get a thrust::tuple of the iterators' references
-  typedef typename tuple_meta_transform<
-    IteratorTuple,
-    iterator_reference
-  >::type tuple_of_references;
-
-  // get at the individual tuple element types by name
-  typedef tuple_elements<tuple_of_references> elements;
-
-  // map thrust::tuple<T...> to tuple_of_iterator_references<T...>
-  typedef thrust::detail::tuple_of_iterator_references<
-    typename elements::T0,
-    typename elements::T1,
-    typename elements::T2,
-    typename elements::T3,
-    typename elements::T4,
-    typename elements::T5,
-    typename elements::T6,
-    typename elements::T7,
-    typename elements::T8,
-    typename elements::T9
-  > type;
-};
-
-
-} // end zip_iterator_base_ns
-
-///////////////////////////////////////////////////////////////////
-//
-// Class zip_iterator_base
-//
-// Builds and exposes the iterator facade type from which the zip 
-// iterator will be derived.
-//
-template<typename IteratorTuple>
-  struct zip_iterator_base
-{
- //private:
-    // reference type is the type of the tuple obtained from the
-    // iterators' reference types.
-    typedef typename zip_iterator_base_ns::tuple_of_iterator_references<IteratorTuple>::type reference;
-
-    // Boost's Value type is the same as reference type.
-    //typedef reference value_type;
-    typedef typename tuple_of_value_types<IteratorTuple>::type value_type;
-
-    // Difference type is the first iterator's difference type
-    typedef typename thrust::iterator_traits<
-      typename thrust::tuple_element<0, IteratorTuple>::type
-    >::difference_type difference_type;
-
-    // Iterator system is the minimum system tag in the
-    // iterator tuple
-    typedef typename
-    minimum_system_in_iterator_tuple<IteratorTuple>::type system;
-
-    // Traversal category is the minimum traversal category in the
-    // iterator tuple
-    typedef typename
-    minimum_traversal_category_in_iterator_tuple<IteratorTuple>::type traversal_category;
-  
- public:
-  
-    // The iterator facade type from which the zip iterator will
-    // be derived.
-    typedef thrust::iterator_facade<
-        zip_iterator<IteratorTuple>,
-        value_type,  
-        system,
-        traversal_category,
-        reference,
-        difference_type
-    > type;
-}; // end zip_iterator_base
-
-} // end detail
-
-} // end thrust
-
-
diff --git a/compat/thrust/iterator/discard_iterator.h b/compat/thrust/iterator/discard_iterator.h
deleted file mode 100644
index 6e089b567e..0000000000
--- a/compat/thrust/iterator/discard_iterator.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/discard_iterator.h
- *  \brief An iterator which "discards" (ignores) values assigned to it upon dereference
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/detail/discard_iterator_base.h>
-#include <thrust/iterator/iterator_facade.h>
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p discard_iterator is an iterator which represents a special kind of pointer that
- *  ignores values written to it upon dereference. This iterator is useful for ignoring
- *  the output of certain algorithms without wasting memory capacity or bandwidth.
- *  \p discard_iterator may also be used to count the size of an algorithm's output which
- *  may not be known a priori.
- *
- *  The following code snippet demonstrates how to use \p discard_iterator to ignore
- *  ignore one of the output ranges of reduce_by_key
- *
- *  \code
- *  #include <thrust/iterator/discard_iterator.h>
- *  #include <thrust/reduce.h>
- *  #include <thrust/device_vector.h>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> keys(7), values(7);
- *
- *    keys[0] = 1;
- *    keys[1] = 3;
- *    keys[2] = 3;
- *    keys[3] = 3;
- *    keys[4] = 2;
- *    keys[5] = 2;
- *    keys[6] = 1;
- *
- *    values[0] = 9;
- *    values[1] = 8;
- *    values[2] = 7;
- *    values[3] = 6;
- *    values[4] = 5;
- *    values[5] = 4;
- *    values[6] = 3;
- *
- *    thrust::device_vector<int> result(4);
- *
- *    // we are only interested in the reduced values
- *    // use discard_iterator to ignore the output keys
- *    thrust::reduce_by_key(keys.begin(), keys.end(),
- *                          values.begin(), values.end(),
- *                          thrust::make_discard_iterator(),
- *                          result.begin());
- *    
- *    // result is now [9, 21, 9, 3]
- *    
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see make_discard_iterator
- */
-template<typename System = use_default>
-  class discard_iterator
-    : public detail::discard_iterator_base<System>::type
-{
-    /*! \cond
-     */
-    friend class thrust::iterator_core_access;
-    typedef typename detail::discard_iterator_base<System>::type          super_t;
-    typedef typename detail::discard_iterator_base<System>::incrementable incrementable;
-    typedef typename detail::discard_iterator_base<System>::base_iterator base_iterator;
-
-  public:
-    typedef typename super_t::reference  reference;
-    typedef typename super_t::value_type value_type;
-
-    /*! \endcond
-     */
-
-    /*! Copy constructor copies from a source discard_iterator.
-     *
-     *  \p rhs The discard_iterator to copy.
-     */
-    __host__ __device__
-    discard_iterator(discard_iterator const &rhs)
-      : super_t(rhs.base()) {}
-
-    /*! This constructor receives an optional index specifying the position of this
-     *  \p discard_iterator in a range.
-     *  
-     *  \p i The index of this \p discard_iterator in a range. Defaults to the
-     *       value returned by \c Incrementable's null constructor. For example,
-     *       when <tt>Incrementable == int</tt>, \c 0.
-     */
-    __host__ __device__
-    discard_iterator(incrementable const &i = incrementable())
-      : super_t(base_iterator(i)) {}
-
-    /*! \cond
-     */
-  
-  private: // Core iterator interface
-    __host__ __device__
-    reference dereference(void) const
-    {
-      return m_element;
-    }
-
-    mutable value_type m_element;
-
-    /*! \endcond
-     */
-}; // end constant_iterator
-
-
-/*! \p make_discard_iterator creates a \p discard_iterator from an optional index parameter.
- *
- *  \param i The index of the returned \p discard_iterator within a range.
- *           In the default case, the value of this parameter is \c 0.
- *
- *  \return A new \p discard_iterator with index as given by \p i.
- *
- *  \see constant_iterator
- */
-inline __host__ __device__
-discard_iterator<> make_discard_iterator(discard_iterator<>::difference_type i = discard_iterator<>::difference_type(0))
-{
-  return discard_iterator<>(i);
-} // end make_discard_iterator()
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end namespace thrust
-  
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
diff --git a/compat/thrust/iterator/iterator_adaptor.h b/compat/thrust/iterator/iterator_adaptor.h
deleted file mode 100644
index 7b9cca308a..0000000000
--- a/compat/thrust/iterator/iterator_adaptor.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/iterator_adaptor.h
- *  \brief An iterator which adapts a base iterator
- */
-
-/*
- * (C) Copyright David Abrahams 2002.
- * (C) Copyright Jeremy Siek    2002.
- * (C) Copyright Thomas Witt    2002.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/detail/use_default.h>
-#include <thrust/iterator/detail/iterator_adaptor_base.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p iterator_adaptor is an iterator which adapts an existing type of iterator to create a new type of
- *  iterator. Most of Thrust's fancy iterators are defined via inheritance from \p iterator_adaptor.
- *  While composition of these existing Thrust iterators is often sufficient for expressing the desired
- *  functionality, it is occasionally more straightforward to derive from \p iterator_adaptor directly.
- *
- *  To see how to use \p iterator_adaptor to create a novel iterator type, let's examine how to use it to
- *  define \p repeat_iterator, a fancy iterator which repeats elements from another range a given number of time:
- *
- *  \code
- *  #include <thrust/iterator/iterator_adaptor.h>
- *
- *  // derive repeat_iterator from iterator_adaptor
- *  template<typename Iterator>
- *    class repeat_iterator
- *      : public thrust::iterator_adaptor<
- *          repeat_iterator<Iterator>, // the first template parameter is the name of the iterator we're creating
- *          Iterator                   // the second template parameter is the name of the iterator we're adapting
- *                                     // we can use the default for the additional template parameters
- *        >
- *  {
- *    public:
- *      // shorthand for the name of the iterator_adaptor we're deriving from
- *      typedef thrust::iterator_adaptor<
- *        repeat_iterator<Iterator>,
- *        Iterator
- *      > super_t;
- *
- *      __host__ __device__
- *      repeat_iterator(const Iterator &x, int n) : super_t(x), begin(x), n(n) {}
- *
- *      // befriend thrust::iterator_core_access to allow it access to the private interface below
- *      friend class thrust::iterator_core_access;
- *
- *    private:
- *      // repeat each element of the adapted range n times
- *      unsigned int n;
- *
- *      // used to keep track of where we began
- *      const Iterator begin;
- *
- *      // it is private because only thrust::iterator_core_access needs access to it
- *      __host__ __device__
- *      typename super_t::reference dereference() const
- *      {
- *        return *(begin + (this->base() - begin) / n);
- *      }
- *  };
- *  \endcode
- *
- *  Except for the first two, \p iterator_adaptor's template parameters are optional. When omitted, or when the
- *  user specifies \p thrust::use_default in its place, \p iterator_adaptor will use a default type inferred from \p Base.
- *
- *  \p iterator_adaptor's functionality is derived from and generally equivalent to \p boost::iterator_adaptor.
- *  The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust
- *  to dispatch an algorithm to one of several parallel backend systems.
- *
- *  \p iterator_adaptor is a powerful tool for creating custom iterators directly. However, the large set of iterator semantics which must be satisfied
- *  for algorithm compatibility can make \p iterator_adaptor difficult to use correctly. Unless you require the full expressivity of \p iterator_adaptor,
- *  consider building a custom iterator through composition of existing higher-level fancy iterators instead. 
- *
- *  Interested users may refer to <tt>boost::iterator_adaptor</tt>'s documentation for further usage examples.
- */
-template<typename Derived,
-         typename Base,
-         typename Value      = use_default,
-         typename System     = use_default,
-         typename Traversal  = use_default,
-         typename Reference  = use_default,
-         typename Difference = use_default>
-  class iterator_adaptor:
-    public detail::iterator_adaptor_base<
-      Derived, Base, Value, System, Traversal, Reference, Difference
-    >::type
-{
-  /*! \cond
-   */
-
-    friend class thrust::iterator_core_access;
-
-  protected:
-    typedef typename detail::iterator_adaptor_base<
-        Derived, Base, Value, System, Traversal, Reference, Difference
-    >::type super_t;
-
-  /*! \endcond
-   */
-  
-  public:
-    /*! \p iterator_adaptor's default constructor does nothing.
-     */
-    __host__ __device__
-    iterator_adaptor(){}
-
-    /*! This constructor copies from a given instance of the \p Base iterator.
-     */
-    __host__ __device__
-    explicit iterator_adaptor(Base const& iter)
-      : m_iterator(iter)
-    {}
-
-    /*! The type of iterator this \p iterator_adaptor's \p adapts.
-     */
-    typedef Base       base_type;
-                                                                                              
-    /*! \cond
-     */
-    typedef typename super_t::reference reference;
-                                                                                              
-    typedef typename super_t::difference_type difference_type;
-    /*! \endcond
-     */
-
-    /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts.
-     */
-    __host__ __device__
-    Base const& base() const
-    { return m_iterator; }
-
-  protected:
-    /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts.
-     */
-    __host__ __device__
-    Base const& base_reference() const
-    { return m_iterator; }
-
-    /*! \return A mutable reference to the \p Base iterator this \p iterator_adaptor adapts.
-     */
-    __host__ __device__
-    Base& base_reference()
-    { return m_iterator; }
-
-    /*! \cond
-     */
-  private: // Core iterator interface for iterator_facade
-
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    typename iterator_adaptor::reference dereference() const
-    { return *m_iterator; }
-
-    __thrust_hd_warning_disable__
-    template<typename OtherDerived, typename OtherIterator, typename V, typename S, typename T, typename R, typename D>
-    __host__ __device__
-    bool equal(iterator_adaptor<OtherDerived, OtherIterator, V, S, T, R, D> const& x) const
-    { return m_iterator == x.base(); }
-
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    void advance(typename iterator_adaptor::difference_type n)
-    {
-      // XXX statically assert on random_access_traversal_tag
-      m_iterator += n;
-    }
-
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    void increment()
-    { ++m_iterator; }
-
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    void decrement()
-    {
-      // XXX statically assert on bidirectional_traversal_tag
-      --m_iterator;
-    }
-
-    __thrust_hd_warning_disable__
-    template<typename OtherDerived, typename OtherIterator, typename V, typename S, typename T, typename R, typename D>
-    __host__ __device__
-    typename iterator_adaptor::difference_type distance_to(iterator_adaptor<OtherDerived, OtherIterator, V, S, T, R, D> const& y) const
-    { return y.base() - m_iterator; }
-
-  private:
-    Base m_iterator;
-
-    /*! \endcond
-     */
-}; // end iterator_adaptor
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/iterator_categories.h b/compat/thrust/iterator/iterator_categories.h
deleted file mode 100644
index 81601b4a40..0000000000
--- a/compat/thrust/iterator/iterator_categories.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/iterator_categories.h
- *  \brief Types for reasoning about the categories of iterators
- */
-
-/*
- * (C) Copyright Jeremy Siek 2002.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include this for stl's iterator tags
-#include <iterator>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \addtogroup iterator_tags Iterator Tags
- *  \ingroup iterators
- *  \addtogroup iterator_tag_classes Iterator Tag Classes
- *  \ingroup iterator_tags
- *  \{
- */
-
-/*! \p input_device_iterator_tag is an empty class: it has no member functions,
- *  member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Input Device Iterator concept within the C++ type
- *  system.
- *
- *  \see http://www.sgi.com/tech/sgi/input_iterator_tag.html, iterator_traits,
- *  output_device_iterator_tag, forward_device_iterator_tag,
- *  bidirectional_device_iterator_tag, random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-struct input_device_iterator_tag {};
-
-/*! \p output_device_iterator_tag is an empty class: it has no member functions,
- *  member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Output Device Iterator concept within the C++ type
- *  system.
- *
- *  \see http://www.sgi.com/tech/sgi/output_iterator_tag.html, iterator_traits,
- *  input_device_iterator_tag, forward_device_iterator_tag,
- *  bidirectional_device_iterator_tag, random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-struct output_device_iterator_tag {};
-
-/*! \p forward_device_iterator_tag is an empty class: it has no member functions,
- *  member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Forward Device Iterator concept within the C++ type
- *  system.
- *
- *  \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html, iterator_traits,
- *  input_device_iterator_tag, output_device_iterator_tag,
- *  bidirectional_device_iterator_tag, random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-struct forward_device_iterator_tag : public input_device_iterator_tag {};
-
-/*! \p bidirectional_device_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Bidirectional Device Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-struct bidirectional_device_iterator_tag : public forward_device_iterator_tag {};
-
-/*! \p random_access_device_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Random Access Device Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-struct random_access_device_iterator_tag : public bidirectional_device_iterator_tag {};
-
-/*! \p input_host_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Input Host Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/input_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  random_access_device_iterator_tag,
- *  output_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-typedef std::input_iterator_tag input_host_iterator_tag;
-
-/*! \p output_host_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Output Host Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/output_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  random_access_device_iterator_tag,
- *  input_host_iterator_tag, forward_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-typedef std::output_iterator_tag output_host_iterator_tag;
-
-/*! \p forward_host_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Forward Host Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag,
- *  bidirectional_host_iterator_tag, random_access_host_iterator_tag
- */
-typedef std::forward_iterator_tag forward_host_iterator_tag;
-
-/*! \p bidirectional_host_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Forward Host Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag,
- *  forward_host_iterator_tag, random_access_host_iterator_tag
- */
-typedef std::bidirectional_iterator_tag bidirectional_host_iterator_tag;
-
-/*! \p random_access_host_iterator_tag is an empty class: it has no member
- *  functions, member variables, or nested types. It is used solely as a "tag": a
- *  representation of the Forward Host Iterator concept within the C++
- *  type system.
- *
- *  \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html,
- *  iterator_traits, input_device_iterator_tag, output_device_iterator_tag,
- *  forward_device_iterator_tag, bidirectional_device_iterator_tag,
- *  random_access_device_iterator_tag,
- *  input_host_iterator_tag, output_host_iterator_tag,
- *  forward_host_iterator_tag, bidirectional_host_iterator_tag
- */
-typedef std::random_access_iterator_tag random_access_host_iterator_tag;
-
-/*! \} // end iterator_tag_classes
- */
-
-} // end namespace thrust
-
-#include <thrust/iterator/detail/universal_categories.h>
-
diff --git a/compat/thrust/iterator/iterator_facade.h b/compat/thrust/iterator/iterator_facade.h
deleted file mode 100644
index 232c150a6f..0000000000
--- a/compat/thrust/iterator/iterator_facade.h
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/iterator/iterator_facade.h
- *  \brief A class which exposes a public interface for iterators
- */
-
-/*
- * (C) Copyright David Abrahams 2002.
- * (C) Copyright Jeremy Siek    2002.
- * (C) Copyright Thomas Witt    2002.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/iterator_facade_category.h>
-#include <thrust/iterator/detail/distance_from_result.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-
-// This forward declaration is required for the friend declaration
-// in iterator_core_access
-template<typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference> class iterator_facade;
-
-
-/*! \p iterator_core_access is the class which user iterator types derived from \p thrust::iterator_adaptor
- *  or \p thrust::iterator_facade must befriend to allow it to access their private interface.
- */
-class iterator_core_access
-{
-    /*! \cond
-     */
-
-    // declare our friends
-    template<typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference> friend class iterator_facade;
-
-    // iterator comparisons are our friends
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator ==(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-                iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator !=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-                iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator <(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-               iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator >(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-               iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator <=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-                iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend bool
-    operator >=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-                iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    // iterator difference is our friend
-    template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-              typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-    inline __host__ __device__
-    friend
-      typename thrust::detail::distance_from_result<
-        iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1>,
-        iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2>
-      >::type
-    operator-(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-              iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs);
-
-    template<typename Facade>
-    __host__ __device__
-    static typename Facade::reference dereference(Facade const& f)
-    {
-      return f.dereference();
-    }
-
-    template<typename Facade>
-    __host__ __device__
-    static void increment(Facade& f)
-    {
-      f.increment();
-    }
-
-    template<typename Facade>
-    __host__ __device__
-    static void decrement(Facade& f)
-    {
-      f.decrement();
-    }
-
-    template <class Facade1, class Facade2>
-    __host__ __device__
-    static bool equal(Facade1 const& f1, Facade2 const& f2)
-    {
-      return f1.equal(f2);
-    }
-
-    // XXX TODO: Investigate whether we need both of these cases
-    //template <class Facade1, class Facade2>
-    //__host__ __device__
-    //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::true_)
-    //{
-    //  return f1.equal(f2);
-    //}
-
-    //template <class Facade1, class Facade2>
-    //__host__ __device__
-    //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::false_)
-    //{
-    //  return f2.equal(f1);
-    //}
-
-    template <class Facade>
-    __host__ __device__
-    static void advance(Facade& f, typename Facade::difference_type n)
-    {
-      f.advance(n);
-    }
-
-    // Facade2 is convertible to Facade1,
-    // so return Facade1's difference_type
-    template <class Facade1, class Facade2>
-    __host__ __device__
-    static typename Facade1::difference_type
-      distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::true_type)
-    {
-      return -f1.distance_to(f2);
-    }
-
-    // Facade2 is not convertible to Facade1,
-    // so return Facade2's difference_type
-    template <class Facade1, class Facade2>
-    __host__ __device__
-    static typename Facade2::difference_type
-      distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::false_type)
-    {
-      return f2.distance_to(f1);
-    }
-    
-    template <class Facade1, class Facade2>
-    __host__ __device__
-    static typename thrust::detail::distance_from_result<Facade1,Facade2>::type
-      distance_from(Facade1 const& f1, Facade2 const& f2)
-    {
-      // dispatch the implementation of this method upon whether or not
-      // Facade2 is convertible to Facade1
-      return distance_from(f1, f2,
-        typename thrust::detail::is_convertible<Facade2,Facade1>::type());
-    }
-
-    //
-    // Curiously Recurring Template interface.
-    //
-    template <typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference>
-    __host__ __device__
-    static Derived& derived(iterator_facade<Derived,Value,System,Traversal,Reference,Difference>& facade)
-    {
-      return *static_cast<Derived*>(&facade);
-    }
-
-    template <typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference>
-    __host__ __device__
-    static Derived const& derived(iterator_facade<Derived,Value,System,Traversal,Reference,Difference> const& facade)
-    {
-      return *static_cast<Derived const*>(&facade);
-    }
-
-    /*! \endcond
-     */
-}; // end iterator_core_access
-
-
-/*! \p iterator_facade is a template which allows the programmer to define a novel iterator with a standards-conforming interface
- *  which Thrust can use to reason about algorithm acceleration opportunities.
- *
- *  Because most of a standard iterator's interface is defined in terms of a small set of core primitives, \p iterator_facade
- *  defines the non-primitive portion mechanically. In principle a novel iterator could explicitly provide the entire interface in
- *  an ad hoc fashion but doing so might be tedious and prone to subtle errors.
- *
- *  Often \p iterator_facade is too primitive a tool to use for defining novel iterators. In these cases, \p iterator_adaptor
- *  or a specific fancy iterator should be used instead.
- *
- *  \p iterator_facade's functionality is derived from and generally equivalent to \p boost::iterator_facade.
- *  The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust
- *  to dispatch an algorithm to one of several parallel backend systems. An additional exception is Thrust's omission
- *  of the \c operator-> member function.
- *
- *  Interested users may refer to <tt>boost::iterator_facade</tt>'s documentation for usage examples.
- *
- *  \note \p iterator_facade's arithmetic operator free functions exist with the usual meanings but are omitted here for brevity.
- */
-template<typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference>
-  class iterator_facade
-{
-  private:
-    /*! \cond
-     */
-
-    //
-    // Curiously Recurring Template interface.
-    //
-    __host__ __device__
-    Derived& derived()
-    {
-      return *static_cast<Derived*>(this);
-    }
-
-    __host__ __device__
-    Derived const& derived() const
-    {
-      return *static_cast<Derived const*>(this);
-    }
-    /*! \endcond
-     */
-
-  public:
-    /*! The type of element pointed to by \p iterator_facade.
-     */
-    typedef typename thrust::detail::remove_const<Value>::type value_type;
-
-    /*! The return type of \p iterator_facade::operator*().
-     */
-    typedef Reference                                          reference;
-
-    /*! The return type of \p iterator_facade's non-existent \c operator->()
-     *  member function. Unlike \c boost::iterator_facade, \p iterator_facade
-     *  disallows access to the \p value_type's members through expressions of the
-     *  form <tt>iter->member</tt>. \p pointer is defined to \c void to indicate
-     *  that these expressions are not allowed. This limitation may be relaxed in a
-     *  future version of Thrust.
-     */
-    typedef void                                               pointer;
-
-    /*! The type of expressions of the form <tt>x - y</tt> where <tt>x</tt> and <tt>y</tt>
-     *  are of type \p iterator_facade.
-     */
-    typedef Difference                                         difference_type;
-
-    /*! The type of iterator category of \p iterator_facade.
-     */
-    typedef typename thrust::detail::iterator_facade_category<
-      System, Traversal, Value, Reference
-    >::type                                                    iterator_category;
-
-    /*! \p operator*() dereferences this \p iterator_facade.
-     *  \return A reference to the element pointed to by this \p iterator_facade.
-     */
-    __host__ __device__
-    reference operator*() const
-    {
-      return iterator_core_access::dereference(this->derived());
-    }
-
-    // XXX unimplemented for now, consider implementing it later
-    //pointer operator->() const
-    //{
-    //  return;
-    //}
-
-    // XXX investigate whether or not we need to go to the lengths
-    //     boost does to determine the return type
-
-    /*! \p operator[] performs indexed dereference.
-     *  \return A reference to the element \p n distance away from this \p iterator_facade.
-     */
-    __host__ __device__
-    reference operator[](difference_type n) const
-    {
-      return *(this->derived() + n);
-    }
-
-    /*! \p operator++ pre-increments this \p iterator_facade to refer to the element in the next position.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    Derived& operator++()
-    {
-      iterator_core_access::increment(this->derived());
-      return this->derived();
-    }
-
-    /*! \p operator++ post-increments this \p iterator_facade and returns a new \p iterator_facade referring to the element in the next position.
-     *  \return A copy of <tt>*this</tt> before increment.
-     */
-    __host__ __device__
-    Derived  operator++(int)
-    {
-      Derived tmp(this->derived());
-      ++*this;
-      return tmp;
-    }
-
-    /*! \p operator-- pre-decrements this \p iterator_facade to refer to the element in the previous position.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    Derived& operator--()
-    {
-      iterator_core_access::decrement(this->derived());
-      return this->derived();
-    }
-
-    /*! \p operator-- post-decrements this \p iterator_facade and returns a new \p iterator_facade referring to the element in the previous position.
-     *  \return A copy of <tt>*this</tt> before decrement.
-     */
-    __host__ __device__
-    Derived  operator--(int)
-    {
-      Derived tmp(this->derived());
-      --*this;
-      return tmp;
-    }
-
-    /*! \p operator+= increments this \p iterator_facade to refer to an element a given distance after its current position.
-     *  \param n The quantity to increment.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    Derived& operator+=(difference_type n)
-    {
-      iterator_core_access::advance(this->derived(), n);
-      return this->derived();
-    }
-
-    /*! \p operator-= decrements this \p iterator_facade to refer to an element a given distance before its current postition.
-     *  \param n The quantity to decrement.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    Derived& operator-=(difference_type n)
-    {
-      iterator_core_access::advance(this->derived(), -n);
-      return this->derived();
-    }
-
-    /*! \p operator- subtracts a given quantity from this \p iterator_facade and returns a new \p iterator_facade referring to the element at the given position before this \p iterator_facade.
-     *  \param n The quantity to decrement
-     *  \return An \p iterator_facade pointing \p n elements before this \p iterator_facade.
-     */
-    __host__ __device__
-    Derived  operator-(difference_type n) const
-    {
-      Derived result(this->derived());
-      return result -= n;
-    }
-}; // end iterator_facade
-
-/*! \cond
- */
-
-// Comparison operators
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator ==(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-            iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return iterator_core_access
-    ::equal(*static_cast<Derived1 const*>(&lhs),
-            *static_cast<Derived2 const*>(&rhs));
-}
-
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator !=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-            iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return !iterator_core_access
-    ::equal(*static_cast<Derived1 const*>(&lhs),
-            *static_cast<Derived2 const*>(&rhs));
-}
-
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator <(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-           iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return 0 > iterator_core_access
-    ::distance_from(*static_cast<Derived1 const*>(&lhs),
-                    *static_cast<Derived2 const*>(&rhs));
-}
-
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator >(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-           iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return 0 < iterator_core_access
-    ::distance_from(*static_cast<Derived1 const*>(&lhs),
-                    *static_cast<Derived2 const*>(&rhs));
-}
-
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator <=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-            iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return 0 >= iterator_core_access
-    ::distance_from(*static_cast<Derived1 const*>(&lhs),
-                    *static_cast<Derived2 const*>(&rhs));
-}
-
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-// XXX it might be nice to implement this at some point
-//typename enable_if_interoperable<Dr1,Dr2,bool>::type // exposition
-bool
-operator >=(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-            iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return 0 <= iterator_core_access
-    ::distance_from(*static_cast<Derived1 const*>(&lhs),
-                    *static_cast<Derived2 const*>(&rhs));
-}
-
-// Iterator difference
-template <typename Derived1, typename Value1, typename System1, typename Traversal1, typename Reference1, typename Difference1,
-          typename Derived2, typename Value2, typename System2, typename Traversal2, typename Reference2, typename Difference2>
-inline __host__ __device__
-
-// divine the type this operator returns
-typename thrust::detail::distance_from_result<
-  iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1>,
-  iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2>
->::type
-
-operator-(iterator_facade<Derived1,Value1,System1,Traversal1,Reference1,Difference1> const& lhs,
-          iterator_facade<Derived2,Value2,System2,Traversal2,Reference2,Difference2> const& rhs)
-{
-  return iterator_core_access
-    ::distance_from(*static_cast<Derived1 const*>(&lhs),
-                    *static_cast<Derived2 const*>(&rhs));
-}
-
-// Iterator addition
-template <typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference>
-inline __host__ __device__
-Derived operator+ (iterator_facade<Derived,Value,System,Traversal,Reference,Difference> const& i,
-                   typename Derived::difference_type n)
-{
-  Derived tmp(static_cast<Derived const&>(i));
-  return tmp += n;
-}
-
-template <typename Derived, typename Value, typename System, typename Traversal, typename Reference, typename Difference>
-inline __host__ __device__
-Derived operator+ (typename Derived::difference_type n,
-                   iterator_facade<Derived,Value,System,Traversal,Reference,Difference> const& i)
-{
-  Derived tmp(static_cast<Derived const&>(i));
-  return tmp += n;
-}
-
-/*! \endcond
- */
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/iterator_traits.h b/compat/thrust/iterator/iterator_traits.h
deleted file mode 100644
index a16f219b07..0000000000
--- a/compat/thrust/iterator/iterator_traits.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/iterator_traits.h
- *  \brief Traits and metafunctions for reasoning about the traits of iterators
- */
-
-/*
- * (C) Copyright David Abrahams 2003.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <iterator>
-
-namespace thrust
-{
-
-/*! \p iterator_traits is a type trait class that provides a uniform
- *  interface for querying the properties of iterators at compile-time.
- */
-template<typename T>
-  struct iterator_traits
-    : public std::iterator_traits<T>
-{
-}; // end iterator_traits
-
-
-template<typename Iterator> struct iterator_value;
-
-template<typename Iterator> struct iterator_pointer;
-
-template<typename Iterator> struct iterator_reference;
-
-template<typename Iterator> struct iterator_difference;
-
-template<typename Iterator> struct iterator_traversal;
-
-template<typename Iterator> struct iterator_system;
-
-// TODO remove this in Thrust v1.7.0
-template<typename Iterator>
-  struct THRUST_DEPRECATED iterator_space
-{
-  typedef THRUST_DEPRECATED typename iterator_system<Iterator>::type type;
-};
-
-
-} // end thrust
-
-#include <thrust/iterator/detail/iterator_traversal_tags.h>
-#include <thrust/iterator/detail/host_system_tag.h>
-#include <thrust/iterator/detail/device_system_tag.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-#include <thrust/iterator/detail/iterator_traits.inl>
-
diff --git a/compat/thrust/iterator/permutation_iterator.h b/compat/thrust/iterator/permutation_iterator.h
deleted file mode 100644
index 509097b347..0000000000
--- a/compat/thrust/iterator/permutation_iterator.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/iterator/permutation_iterator.h
- *  \brief An iterator which performs a gather or scatter operation when dereferenced
- */
-
-/*
- * (C) Copyright Toon Knapen    2001.
- * (C) Copyright David Abrahams 2003.
- * (C) Copyright Roland Richter 2003.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/permutation_iterator_base.h>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p permutation_iterator is an iterator which represents a pointer into a
- *  reordered view of a given range. \p permutation_iterator is an imprecise name;
- *  the reordered view need not be a strict permutation. This iterator is useful
- *  for fusing a scatter or gather operation with other algorithms.
- *
- *  This iterator takes two arguments:
- *
- *    - an iterator to the range \c V on which the "permutation" will be applied
- *    - the reindexing scheme that defines how the elements of \c V will be permuted.
- *
- *  Note that \p permutation_iterator is not limited to strict permutations of the
- *  given range \c V. The distance between begin and end of the reindexing iterators
- *  is allowed to be smaller compared to the size of the range \c V, in which case
- *  the \p permutation_iterator only provides a "permutation" of a subrange of \c V.
- *  The indices neither need to be unique. In this same context, it must be noted
- *  that the past-the-end \p permutation_iterator is completely defined by means of
- *  the past-the-end iterator to the indices.
- *
- *  The following code snippet demonstrates how to create a \p permutation_iterator
- *  which represents a reordering of the contents of a \p device_vector.
- *
- *  \code
- *  #include <thrust/iterator/permutation_iterator.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<float> values(4);
- *  values[0] = 10.0f;
- *  values[1] = 20.0f;
- *  values[2] = 30.0f;
- *  values[3] = 40.0f;
- *  values[4] = 50.0f;
- *  values[5] = 60.0f;
- *  values[6] = 70.0f;
- *  values[7] = 80.0f;
- *
- *  thrust::device_vector<int> indices(4);
- *  indices[0] = 2;
- *  indices[1] = 6;
- *  indices[2] = 1;
- *  indices[3] = 3;
- *
- *  typedef thrust::device_vector<float>::iterator ElementIterator;
- *  typedef thrust::device_vector<int>::iterator   IndexIterator;
- *
- *  thrust::permutation_iterator<ElementIterator,IndexIterator> iter(values.begin(), indices.begin());
- *
- *  *iter;   // returns 30.0f;
- *  iter[0]; // returns 30.0f;
- *  iter[1]; // returns 70.0f;
- *  iter[2]; // returns 20.0f;
- *  iter[3]; // returns 40.0f;
- *
- *  // iter[4] is an out-of-bounds error
- *
- *  *iter   = -1.0f; // sets values[2] to -1.0f;
- *  iter[0] = -1.0f; // sets values[2] to -1.0f;
- *  iter[1] = -1.0f; // sets values[6] to -1.0f;
- *  iter[2] = -1.0f; // sets values[1] to -1.0f;
- *  iter[3] = -1.0f; // sets values[3] to -1.0f;
- *
- *  // values is now {10, -1, -1, -1, 50, 60, -1, 80}
- *  \endcode
- *
- *  \see make_permutation_iterator
- */
-template <typename ElementIterator,
-          typename IndexIterator>
-  class permutation_iterator
-    : public thrust::detail::permutation_iterator_base<
-        ElementIterator,
-        IndexIterator
-      >::type
-{
-  /*! \cond
-   */
-  private:
-    typedef typename detail::permutation_iterator_base<ElementIterator,IndexIterator>::type super_t;
-
-    friend class thrust::iterator_core_access;
-  /*! \endcond
-   */
-
-  public:
-    /*! Null constructor calls the null constructor of this \p permutation_iterator's
-     *  element iterator.
-     */
-    __host__ __device__
-    permutation_iterator()
-      : m_element_iterator() {}
-
-    /*! Constructor accepts an \c ElementIterator into a range of values and an
-     *  \c IndexIterator into a range of indices defining the indexing scheme on the
-     *  values.
-     *
-     *  \param x An \c ElementIterator pointing this \p permutation_iterator's range of values.
-     *  \param y An \c IndexIterator pointing to an indexing scheme to use on \p x.
-     */
-    __host__ __device__
-    explicit permutation_iterator(ElementIterator x, IndexIterator y)
-      : super_t(y), m_element_iterator(x) {}
-
-    /*! Copy constructor accepts a related \p permutation_iterator.
-     *  \param r A compatible \p permutation_iterator to copy from.
-     */
-    template<typename OtherElementIterator, typename OtherIndexIterator>
-    __host__ __device__
-    permutation_iterator(permutation_iterator<OtherElementIterator,OtherIndexIterator> const &r
-    // XXX remove these guards when we have static_assert
-    , typename detail::enable_if_convertible<OtherElementIterator, ElementIterator>::type* = 0
-    , typename detail::enable_if_convertible<OtherIndexIterator, IndexIterator>::type* = 0
-    )
-      : super_t(r.base()), m_element_iterator(r.m_element_iterator)
-    {}
-
-  /*! \cond
-   */
-  private:
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    typename super_t::reference dereference() const
-    {
-      return *(m_element_iterator + *this->base());
-    }
-
-    // make friends for the copy constructor
-    template<typename,typename> friend class permutation_iterator;
-
-    ElementIterator m_element_iterator;
-  /*! \endcond
-   */
-}; // end permutation_iterator
-
-
-/*! \p make_permutation_iterator creates a \p permutation_iterator
- *  from an \c ElementIterator pointing to a range of elements to "permute"
- *  and an \c IndexIterator pointing to a range of indices defining an indexing
- *  scheme on the values.
- *
- *  \param e An \c ElementIterator pointing to a range of values.
- *  \param i An \c IndexIterator pointing to an indexing scheme to use on \p e.
- *  \return A new \p permutation_iterator which permutes the range \p e by \p i.
- *  \see permutation_iterator
- */
-template<typename ElementIterator, typename IndexIterator>
-__host__ __device__
-permutation_iterator<ElementIterator,IndexIterator> make_permutation_iterator(ElementIterator e, IndexIterator i)
-{
-  return permutation_iterator<ElementIterator,IndexIterator>(e,i);
-}
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/retag.h b/compat/thrust/iterator/retag.h
deleted file mode 100644
index 660da8f2fd..0000000000
--- a/compat/thrust/iterator/retag.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/iterator/retag.h
- *  \brief Functionality for altering an iterator's associated system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/detail/retag.h>
-
-namespace thrust
-{
-
-
-/*! \ingroup iterator_tags
- *  \{
- */
-
-#if 0
-/*! \p reinterpret_tag returns a copy of an iterator and changes the type of the result's system tag.
- *  \tparam Tag Any system tag.
- *  \tparam Iterator Any iterator type.
- *  \param iter The iterator of interest.
- *  \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is otherwise
- *          equivalent to \p iter.
- *  \note Unlike \p retag, \p reinterpret_tag does not enforce that the converted-to system tag be
- *        related to the converted-from system tag.
- *  \see retag
- */
-template<typename Tag, typename Iterator>
-unspecified_iterator_type reinterpret_tag(Iterator iter);
-
-/*! \p retag returns a copy of an iterator and changes the type of the result's system tag.
- *  \tparam Tag \p Tag shall be convertible to <tt>thrust::iterator_system<Iterator>::type</tt>,
- *              or <tt>thrust::iterator_system<Iterator>::type</tt> is a base type of \p Tag.
- *  \tparam Iterator Any iterator type.
- *  \param iter The iterator of interest.
- *  \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is
- *          otherwise equivalent to \p iter.
- *  \note Unlike \p reinterpret_tag, \p retag enforces that the converted-to system tag be
- *        related to the converted-from system tag.
- *  \see reinterpret_tag
- */
-template<typename Tag, typename Iterator>
-unspecified_iterator_type retag(Iterator iter);
-#endif
-
-/*! \} // iterator_tags
- */
-
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/reverse_iterator.h b/compat/thrust/iterator/reverse_iterator.h
deleted file mode 100644
index 03f03396d3..0000000000
--- a/compat/thrust/iterator/reverse_iterator.h
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/reverse_iterator.h
- *  \brief An iterator adaptor which adapts another iterator to traverse backwards
- */
-
-/*
- * (C) Copyright David Abrahams 2002.
- * (C) Copyright Jeremy Siek    2002.
- * (C) Copyright Thomas Witt    2002.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/detail/reverse_iterator_base.h>
-#include <thrust/iterator/iterator_facade.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p reverse_iterator is an iterator which represents a pointer into a
- *  reversed view of a given range. In this way, \p reverse_iterator allows
- *  backwards iteration through a bidirectional input range.
- *
- *  It is important to note that although \p reverse_iterator is constructed
- *  from a given iterator, it points to the element preceding it. In this way,
- *  the past-the-end \p reverse_iterator of a given range points to the element
- *  preceding the first element of the input range. By the same token, the first
- *  \p reverse_iterator of a given range is constructed from a past-the-end iterator
- *  of the original range yet points to the last element of the input.
- *
- *  The following code snippet demonstrates how to create a \p reverse_iterator
- *  which represents a reversed view of the contents of a \p device_vector.
- *
- *  \code
- *  #include <thrust/iterator/reverse_iterator.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<float> v(4);
- *  v[0] = 0.0f;
- *  v[1] = 1.0f;
- *  v[2] = 2.0f;
- *  v[3] = 3.0f;
- *
- *  typedef thrust::device_vector<float>::iterator Iterator;
- *
- *  // note that we point the iterator to the *end* of the device_vector
- *  thrust::reverse_iterator<Iterator> iter(values.end());
- *
- *  *iter;   // returns 3.0f;
- *  iter[0]; // returns 3.0f;
- *  iter[1]; // returns 2.0f;
- *  iter[2]; // returns 1.0f;
- *  iter[3]; // returns 0.0f;
- *
- *  // iter[4] is an out-of-bounds error
- *  \endcode
- *
- *  Since reversing a range is a common operation, containers like \p device_vector
- *  have nested typedefs for declaration shorthand and methods for constructing
- *  reverse_iterators. The following code snippet is equivalent to the previous:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<float> v(4);
- *  v[0] = 0.0f;
- *  v[1] = 1.0f;
- *  v[2] = 2.0f;
- *  v[3] = 3.0f;
- *
- *  // we use the nested type reverse_iterator to refer to a reversed view of
- *  // a device_vector and the method rbegin() to create a reverse_iterator pointing
- *  // to the beginning of the reversed device_vector
- *  thrust::device_iterator<float>::reverse_iterator iter = values.rbegin();
- *
- *  *iter;   // returns 3.0f;
- *  iter[0]; // returns 3.0f;
- *  iter[1]; // returns 2.0f;
- *  iter[2]; // returns 1.0f;
- *  iter[3]; // returns 0.0f;
- *
- *  // iter[4] is an out-of-bounds error
- *
- *  // similarly, rend() points to the end of the reversed sequence:
- *  assert(values.rend() == (iter + 4));
- *  \endcode
- *
- *  Finally, the following code snippet demonstrates how to use reverse_iterator to
- *  perform a reversed prefix sum operation on the contents of a device_vector:
- *
- *  \code
- *  #include <thrust/device_vector.h>
- *  #include <thrust/scan.h>
- *  ...
- *  thrust::device_vector<int> v(5);
- *  v[0] = 0;
- *  v[1] = 1;
- *  v[2] = 2;
- *  v[3] = 3;
- *  v[4] = 4;
- *
- *  thrust::device_vector<int> result(5);
- *
- *  // exclusive scan v into result in reverse
- *  thrust::exclusive_scan(v.rbegin(), v.rend(), result.begin());
- *
- *  // result is now {0, 4, 7, 9, 10}
- *  \endcode
- *
- *  \see make_reverse_iterator
- */
-template<typename BidirectionalIterator>
-  class reverse_iterator
-    : public detail::reverse_iterator_base<BidirectionalIterator>::type
-{
-  /*! \cond
-   */
-  private:
-    typedef typename thrust::detail::reverse_iterator_base<
-      BidirectionalIterator
-    >::type super_t;
-
-    friend class thrust::iterator_core_access;
-  /*! \endcond
-   */
-
-  public:
-    /*! Default constructor does nothing.
-     */
-    __host__ __device__
-    reverse_iterator(void) {}
-
-    /*! \p Constructor accepts a \c BidirectionalIterator pointing to a range
-     *  for this \p reverse_iterator to reverse.
-     *
-     *  \param x A \c BidirectionalIterator pointing to a range to reverse.
-     */
-    __host__ __device__
-    explicit reverse_iterator(BidirectionalIterator x);
-
-    /*! \p Copy constructor allows construction from a related compatible
-     *  \p reverse_iterator.
-     *
-     *  \param r A \p reverse_iterator to copy from.
-     */
-    template<typename OtherBidirectionalIterator>
-    __host__ __device__
-    reverse_iterator(reverse_iterator<OtherBidirectionalIterator> const &r
-// XXX msvc screws this up
-// XXX remove these guards when we have static_assert
-#ifndef _MSC_VER
-                     , typename thrust::detail::enable_if<
-                         thrust::detail::is_convertible<
-                           OtherBidirectionalIterator,
-                           BidirectionalIterator
-                         >::value
-                       >::type * = 0
-#endif // _MSC_VER
-                     );
-
-  /*! \cond
-   */
-  private:
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    typename super_t::reference dereference(void) const;
-
-    __host__ __device__
-    void increment(void);
-
-    __host__ __device__
-    void decrement(void);
-
-    __host__ __device__
-    void advance(typename super_t::difference_type n);
-
-    template<typename OtherBidirectionalIterator>
-    __host__ __device__
-    typename super_t::difference_type
-    distance_to(reverse_iterator<OtherBidirectionalIterator> const &y) const;
-  /*! \endcond
-   */
-}; // end reverse_iterator
-
-
-/*! \p make_reverse_iterator creates a \p reverse_iterator
- *  from a \c BidirectionalIterator pointing to a range of elements to reverse.
- *  
- *  \param x A \c BidirectionalIterator pointing to a range to reverse.
- *  \return A new \p reverse_iterator which reverses the range \p x.
- */
-template<typename BidirectionalIterator>
-__host__ __device__
-reverse_iterator<BidirectionalIterator> make_reverse_iterator(BidirectionalIterator x);
-
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
-#include <thrust/iterator/detail/reverse_iterator.inl>
-
diff --git a/compat/thrust/iterator/transform_iterator.h b/compat/thrust/iterator/transform_iterator.h
deleted file mode 100644
index 985b61b775..0000000000
--- a/compat/thrust/iterator/transform_iterator.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/transform_iterator.h
- *  \brief An iterator which adapts another iterator by applying a function to the result of its dereference 
- */
-
-/*
- * (C) Copyright David Abrahams 2002.
- * (C) Copyright Jeremy Siek    2002.
- * (C) Copyright Thomas Witt    2002.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include the details first
-#include <thrust/iterator/detail/transform_iterator.inl>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p transform_iterator is an iterator which represents a pointer into a range
- *  of values after transformation by a function. This iterator is useful for 
- *  creating a range filled with the result of applying an operation to another range
- *  without either explicitly storing it in memory, or explicitly executing the transformation.
- *  Using \p transform_iterator facilitates kernel fusion by deferring the execution
- *  of a transformation until the value is needed while saving both memory capacity
- *  and bandwidth.
- *
- *  The following code snippet demonstrates how to create a \p transform_iterator
- *  which represents the result of \c sqrtf applied to the contents of a \p device_vector.
- *
- *  \code
- *  #include <thrust/iterator/transform_iterator.h>
- *  #include <thrust/device_vector.h>
- *  
- *  // note: functor inherits from unary_function
- *  struct square_root : public thrust::unary_function<float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x) const
- *    {
- *      return sqrtf(x);
- *    }
- *  };
- *  
- *  int main(void)
- *  {
- *    thrust::device_vector<float> v(4);
- *    v[0] = 1.0f;
- *    v[1] = 4.0f;
- *    v[2] = 9.0f;
- *    v[3] = 16.0f;
- *                                                                                           
- *    typedef thrust::device_vector<float>::iterator FloatIterator;
- *                                                                                           
- *    thrust::transform_iterator<square_root, FloatIterator> iter(v.begin(), square_root());
- *                                                                                           
- *    *iter;   // returns 1.0f
- *    iter[0]; // returns 1.0f;
- *    iter[1]; // returns 2.0f;
- *    iter[2]; // returns 3.0f;
- *    iter[3]; // returns 4.0f;
- *                                                                                           
- *    // iter[4] is an out-of-bounds error
- *  }
- *  \endcode
- *
- *  This next example demonstrates how to use a \p transform_iterator with the
- *  \p thrust::reduce function to compute the sum of squares of a sequence.
- *  We will create temporary \p transform_iterators with the
- *  \p make_transform_iterator function in order to avoid explicitly specifying their type:
- *
- *  \code
- *  #include <thrust/iterator/transform_iterator.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/reduce.h>
- *  #include <iostream>
- *  
- *  // note: functor inherits from unary_function
- *  struct square : public thrust::unary_function<float,float>
- *  {
- *    __host__ __device__
- *    float operator()(float x) const
- *    {
- *      return x * x;
- *    }
- *  };
- *  
- *  int main(void)
- *  {
- *    // initialize a device array
- *    thrust::device_vector<float> v(4);
- *    v[0] = 1.0f;
- *    v[1] = 2.0f;
- *    v[2] = 3.0f;
- *    v[3] = 4.0f;
- *  
- *    float sum_of_squares =
- *     thrust::reduce(thrust::make_transform_iterator(v.begin(), square()),
- *                    thrust::make_transform_iterator(v.end(),   square()));
- *  
- *    std::cout << "sum of squares: " << sum_of_squares << std::endl;
- *    return 0;
- *  }
- *  \endcode
- *
- *  Note that in the previous two examples the transform functor (namely \c square_root 
- *  and \c square) inherits from \c thrust::unary_function.  Inheriting from 
- *  \c thrust::unary_function ensures that a functor is a valid \c AdaptableUnaryFunction
- *  and provides all the necessary \c typedef declarations.  The \p transform_iterator
- *  can also be applied to a \c UnaryFunction that does not inherit from 
- *  \c thrust::unary_function using an optional template argument.  The following example
- *  illustrates how to use the third template argument to specify the \c result_type of
- *  the function.   
- *
- *  \code
- *  #include <thrust/iterator/transform_iterator.h>
- *  #include <thrust/device_vector.h>
- *  
- *  // note: functor *does not* inherit from unary_function
- *  struct square_root
- *  {
- *    __host__ __device__
- *    float operator()(float x) const
- *    {
- *      return sqrtf(x);
- *    }
- *  };
- *  
- *  int main(void)
- *  {
- *    thrust::device_vector<float> v(4);
- *    v[0] = 1.0f;
- *    v[1] = 4.0f;
- *    v[2] = 9.0f;
- *    v[3] = 16.0f;
- *                                                                                           
- *    typedef thrust::device_vector<float>::iterator FloatIterator;
- *    
- *    // note: float result_type is specified explicitly
- *    thrust::transform_iterator<square_root, FloatIterator, float> iter(v.begin(), square_root());
- *                                                                                           
- *    *iter;   // returns 1.0f
- *    iter[0]; // returns 1.0f;
- *    iter[1]; // returns 2.0f;
- *    iter[2]; // returns 3.0f;
- *    iter[3]; // returns 4.0f;
- *                                                                                           
- *    // iter[4] is an out-of-bounds error
- *  }
- *  \endcode
- *
- *  \see make_transform_iterator
- */
-template <class AdaptableUnaryFunction, class Iterator, class Reference = use_default, class Value = use_default>
-  class transform_iterator
-    : public detail::transform_iterator_base<AdaptableUnaryFunction, Iterator, Reference, Value>::type
-{
-  /*! \cond
-   */
-  public:
-    typedef typename
-    detail::transform_iterator_base<AdaptableUnaryFunction, Iterator, Reference, Value>::type
-    super_t;
-
-    friend class thrust::iterator_core_access;
-  /*! \endcond
-   */
-
-  public:
-    /*! Null constructor does nothing.
-     */
-    __host__ __device__
-    transform_iterator() {}
-  
-    /*! This constructor takes as arguments an \c Iterator and an \c AdaptableUnaryFunction
-     *  and copies them to a new \p transform_iterator.
-     *
-     *  \param x An \c Iterator pointing to the input to this \p transform_iterator's \c AdaptableUnaryFunction.
-     *  \param f An \c AdaptableUnaryFunction used to transform the objects pointed to by \p x.
-     */
-    __host__ __device__
-    transform_iterator(Iterator const& x, AdaptableUnaryFunction f)
-      : super_t(x), m_f(f) {
-    }
-  
-    /*! This explicit constructor copies the value of a given \c Iterator and creates
-     *  this \p transform_iterator's \c AdaptableUnaryFunction using its null constructor.
-     *
-     *  \param x An \c Iterator to copy.
-     */
-    __host__ __device__
-    explicit transform_iterator(Iterator const& x)
-      : super_t(x) { }
-
-    /*! This copy constructor creates a new \p transform_iterator from another
-     *  \p transform_iterator.
-     *
-     *  \param other The \p transform_iterator to copy.
-     */
-    template<typename OtherAdaptableUnaryFunction,
-             typename OtherIterator,
-             typename OtherReference,
-             typename OtherValue>
-    __host__ __device__
-    transform_iterator(const transform_iterator<OtherAdaptableUnaryFunction, OtherIterator, OtherReference, OtherValue> &other,
-                       typename thrust::detail::enable_if_convertible<OtherIterator, Iterator>::type* = 0,
-                       typename thrust::detail::enable_if_convertible<OtherAdaptableUnaryFunction, AdaptableUnaryFunction>::type* = 0)
-      : super_t(other.base()), m_f(other.functor()) {}
-
-    /*! Copy assignment operator copies from another \p transform_iterator.
-     *  \p other The other \p transform_iterator to copy
-     *  \return <tt>*this</tt>
-     *
-     *  \note If the type of this \p transform_iterator's functor is not copy assignable
-     *        (for example, if it is a lambda) it is not an error to call this function.
-     *        In this case, however, the functor will not be modified.
-     *
-     *        In any case, this \p transform_iterator's underlying iterator will be copy assigned.
-     */
-    __host__ __device__
-    transform_iterator &operator=(const transform_iterator &other)
-    {
-      return do_assign(other,
-      // XXX gcc 4.2.1 crashes on is_copy_assignable; just assume the functor is assignable as a WAR
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION <= 40201)
-          thrust::detail::true_type()
-#else
-          typename thrust::detail::is_copy_assignable<AdaptableUnaryFunction>::type()
-#endif // THRUST_HOST_COMPILER
-      );
-    }
-
-    /*! This method returns a copy of this \p transform_iterator's \c AdaptableUnaryFunction.
-     *  \return A copy of this \p transform_iterator's \c AdaptableUnaryFunction.
-     */
-    __host__ __device__
-    AdaptableUnaryFunction functor() const
-      { return m_f; }
-
-    /*! \cond
-     */
-  private:
-    __host__ __device__
-    transform_iterator &do_assign(const transform_iterator &other, thrust::detail::true_type)
-    {
-      super_t::operator=(other);
-
-      // do assign to m_f
-      m_f = other.functor();
-
-      return *this;
-    }
-
-    __host__ __device__
-    transform_iterator &do_assign(const transform_iterator &other, thrust::detail::false_type)
-    {
-      super_t::operator=(other);
-
-      // don't assign to m_f
-
-      return *this;
-    }
-
-    __thrust_hd_warning_disable__
-    __host__ __device__
-    typename super_t::reference dereference() const
-    { 
-      // XXX consider making this a member instead of a temporary created inside dereference
-      thrust::detail::host_device_function<AdaptableUnaryFunction, typename super_t::reference> wrapped_f(m_f);
-
-      return wrapped_f(*this->base());
-    }
-
-    // tag this as mutable per Dave Abrahams in this thread:
-    // http://lists.boost.org/Archives/boost/2004/05/65332.php
-    mutable AdaptableUnaryFunction m_f;
-
-    /*! \endcond
-     */
-}; // end transform_iterator
-
-
-/*! \p make_transform_iterator creates a \p transform_iterator
- *  from an \c Iterator and \c AdaptableUnaryFunction.
- *
- *  \param it The \c Iterator pointing to the input range of the
- *            newly created \p transform_iterator.
- *  \param fun The \c AdaptableUnaryFunction used to transform the range pointed
- *             to by \p it in the newly created \p transform_iterator.
- *  \return A new \p transform_iterator which transforms the range at
- *          \p it by \p fun.
- *  \see transform_iterator
- */
-template <class AdaptableUnaryFunction, class Iterator>
-inline __host__ __device__
-transform_iterator<AdaptableUnaryFunction, Iterator>
-make_transform_iterator(Iterator it, AdaptableUnaryFunction fun)
-{
-  return transform_iterator<AdaptableUnaryFunction, Iterator>(it, fun);
-} // end make_transform_iterator
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
diff --git a/compat/thrust/iterator/zip_iterator.h b/compat/thrust/iterator/zip_iterator.h
deleted file mode 100644
index 8e7299c407..0000000000
--- a/compat/thrust/iterator/zip_iterator.h
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/iterator/zip_iterator.h
- *  \brief An iterator which returns a tuple of the result of dereferencing
- *         a tuple of iterators when dereferenced
- */
-
-/*
- * Copyright David Abrahams and Thomas Becker 2000-2006.
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/detail/zip_iterator_base.h>
-#include <thrust/iterator/iterator_facade.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-/*! \addtogroup iterators
- *  \{
- */
-
-/*! \addtogroup fancyiterator Fancy Iterators
- *  \ingroup iterators
- *  \{
- */
-
-/*! \p zip_iterator is an iterator which represents a pointer into a range
- *  of \p tuples whose elements are themselves taken from a \p tuple of input
- *  iterators. This iterator is useful for creating a virtual array of structures
- *  while achieving the same performance and bandwidth as the structure of arrays
- *  idiom. \p zip_iterator also facilitates kernel fusion by providing a convenient
- *  means of amortizing the execution of the same operation over multiple ranges.
- *
- *  The following code snippet demonstrates how to create a \p zip_iterator
- *  which represents the result of "zipping" multiple ranges together.
- *  
- *  \code
- *  #include <thrust/iterator/zip_iterator.h>
- *  #include <thrust/tuple.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> int_v(3);
- *  int_v[0] = 0; int_v[1] = 1; int_v[2] = 2;
- *
- *  thrust::device_vector<float> float_v(3);
- *  float_v[0] = 0.0f; float_v[1] = 1.0;f float_v[2] = 2.0f;
- *
- *  thrust::device_vector<char> char_v(3);
- *  char_v[0] = 'a'; char_v[1] = 'b'; char_v[2] = 'c';
- *
- *  // typedef these iterators for shorthand
- *  typedef thrust::device_vector<int>::iterator   IntIterator;
- *  typedef thrust::device_vector<float>::iterator FloatIterator;
- *  typedef thrust::device_vector<char>::iterator  CharIterator;
- *
- *  // typedef a tuple of these iterators
- *  typedef thrust::tuple<IntIterator, FloatIterator, CharIterator> IteratorTuple;
- *
- *  // typedef the zip_iterator of this tuple
- *  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
- *
- *  // finally, create the zip_iterator
- *  ZipIterator iter(thrust::make_tuple(int_v.begin(), float_v.begin(), char_v.begin()));
- *
- *  *iter;   // returns (0, 0.0f, 'a')
- *  iter[0]; // returns (0, 0.0f, 'a')
- *  iter[1]; // returns (1, 1.0f, 'b')
- *  iter[2]; // returns (2, 2.0f, 'c')
- *
- *  thrust::get<0>(iter[2]); // returns 2
- *  thrust::get<1>(iter[0]); // returns 0.0f
- *  thrust::get<2>(iter[1]); // returns 'b'
- *
- *  // iter[3] is an out-of-bounds error
- *  \endcode
- *
- *  Defining the type of a \p zip_iterator can be complex. The next code example demonstrates
- *  how to use the \p make_zip_iterator function with the \p make_tuple function to avoid
- *  explicitly specifying the type of the \p zip_iterator. This example shows how to use
- *  \p zip_iterator to copy multiple ranges with a single call to \p thrust::copy.
- *
- *  \code
- *  #include <thrust/zip_iterator.h>
- *  #include <thrust/tuple.h>
- *  #include <thrust/device_vector.h>
- *
- *  int main(void)
- *  {
- *    thrust::device_vector<int> int_in(3), int_out(3);
- *    int_in[0] = 0;
- *    int_in[1] = 1;
- *    int_in[2] = 2;
- *
- *    thrust::device_vector<float> float_in(3), float_out(3);
- *    float_in[0] =  0.0f;
- *    float_in[1] = 10.0f;
- *    float_in[2] = 20.0f;
- *
- *    thrust::copy(thrust::make_zip_iterator(thrust::make_tuple(int_in.begin(), float_in.begin())),
- *                 thrust::make_zip_iterator(thrust::make_tuple(int_in.end(),   float_in.end())),
- *                 thrust::make_zip_iterator(thrust::make_tuple(int_out.begin(),float_out.begin())));
- *
- *    // int_out is now [0, 1, 2]
- *    // float_out is now [0.0f, 10.0f, 20.0f]
- *
- *    return 0;
- *  }
- *  \endcode
- *
- *  \see make_zip_iterator
- *  \see make_tuple
- *  \see tuple
- *  \see get
- */
-template <typename IteratorTuple>
-  class zip_iterator
-    : public detail::zip_iterator_base<IteratorTuple>::type
-{
-  public:
-    /*! Null constructor does nothing.
-     */
-    inline __host__ __device__
-    zip_iterator(void);
-
-    /*! This constructor creates a new \p zip_iterator from a
-     *  \p tuple of iterators.
-     *  
-     *  \param iterator_tuple The \p tuple of iterators to copy from.
-     */
-    inline __host__ __device__
-    zip_iterator(IteratorTuple iterator_tuple);
-
-    /*! This copy constructor creates a new \p zip_iterator from another
-     *  \p zip_iterator.
-     *
-     *  \param other The \p zip_iterator to copy.
-     */
-    template<typename OtherIteratorTuple>
-    inline __host__ __device__
-    zip_iterator(const zip_iterator<OtherIteratorTuple> &other,
-                 typename thrust::detail::enable_if_convertible<
-                   OtherIteratorTuple,
-                   IteratorTuple
-                 >::type * = 0);
-
-    /*! This method returns a \c const reference to this \p zip_iterator's
-     *  \p tuple of iterators.
-     *
-     *  \return A \c const reference to this \p zip_iterator's \p tuple
-     *          of iterators.
-     */
-    inline __host__ __device__
-    const IteratorTuple &get_iterator_tuple() const;
-
-    /*! \cond
-     */
-  private:
-    typedef typename
-    detail::zip_iterator_base<IteratorTuple>::type super_t;
-
-    friend class thrust::iterator_core_access;
-
-    // Dereferencing returns a tuple built from the dereferenced
-    // iterators in the iterator tuple.
-    __host__ __device__
-    typename super_t::reference dereference() const;
-
-    // Two zip_iterators are equal if the two first iterators of the
-    // tuple are equal. Note this differs from Boost's implementation, which
-    // considers the entire tuple.
-    template<typename OtherIteratorTuple>
-    inline __host__ __device__
-    bool equal(const zip_iterator<OtherIteratorTuple> &other) const;
-
-    // Advancing a zip_iterator means to advance all iterators in the tuple
-    inline __host__ __device__
-    void advance(typename super_t::difference_type n);
-
-    // Incrementing a zip iterator means to increment all iterators in the tuple
-    inline __host__ __device__
-    void increment();
-
-    // Decrementing a zip iterator means to decrement all iterators in the tuple
-    inline __host__ __device__
-    void decrement();
-
-    // Distance is calculated using the first iterator in the tuple.
-    template<typename OtherIteratorTuple>
-    inline __host__ __device__
-      typename super_t::difference_type
-        distance_to(const zip_iterator<OtherIteratorTuple> &other) const;
-
-    // The iterator tuple.
-    IteratorTuple m_iterator_tuple;
-
-    /*! \endcond
-     */
-}; // end zip_iterator
-
-/*! \p make_zip_iterator creates a \p zip_iterator from a \p tuple
- *  of iterators.
- *
- *  \param t The \p tuple of iterators to copy.
- *  \return A newly created \p zip_iterator which zips the iterators encapsulated in \p t.
- *
- *  \see zip_iterator
- */
-template<typename IteratorTuple>
-inline __host__ __device__
-zip_iterator<IteratorTuple> make_zip_iterator(IteratorTuple t);
-
-/*! \} // end fancyiterators
- */
-
-/*! \} // end iterators
- */
-
-} // end thrust
-
-#include <thrust/iterator/detail/zip_iterator.inl>
-
diff --git a/compat/thrust/logical.h b/compat/thrust/logical.h
deleted file mode 100644
index 21510f3f21..0000000000
--- a/compat/thrust/logical.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file logical.h
- *  \brief Logical operations on ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup logical
- *  \ingroup reductions
- *  \{
- */
-
-
-/*! \p all_of determines whether all elements in a range satify a predicate.
- *  Specifically, \p all_of returns \c true if <tt>pred(*i)</tt> is \c true
- *  for every iterator \c i in the range <tt>[first, last)</tt> and 
- *  \c false otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if all elements satisfy the predicate; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::all_of(thrust::host, A, A + 2, thrust::identity<bool>()); // returns true
- *  thrust::all_of(thrust::host, A, A + 3, thrust::identity<bool>()); // returns false
- *
- *  // empty range
- *  thrust::all_of(thrust::host, A, A, thrust::identity<bool>()); // returns false
- *  
- *  \endcode
- *
- *  \see any_of
- *  \see none_of
- *  \see transform_reduce
- */
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool all_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \p all_of determines whether all elements in a range satify a predicate.
- * Specifically, \p all_of returns \c true if <tt>pred(*i)</tt> is \c true
- * for every iterator \c i in the range <tt>[first, last)</tt> and 
- * \c false otherwise.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if all elements satisfy the predicate; \c false, otherwise.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::all_of(A, A + 2, thrust::identity<bool>()); // returns true
- *  thrust::all_of(A, A + 3, thrust::identity<bool>()); // returns false
- *
- *  // empty range
- *  thrust::all_of(A, A, thrust::identity<bool>()); // returns false
- *  
- *  \endcode
- *
- *  \see any_of
- *  \see none_of
- *  \see transform_reduce
- */
-template <typename InputIterator, typename Predicate>
-bool all_of(InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \p any_of determines whether any element in a range satifies a predicate.
- *  Specifically, \p any_of returns \c true if <tt>pred(*i)</tt> is \c true
- *  for any iterator \c i in the range <tt>[first, last)</tt> and 
- *  \c false otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if any element satisfies the predicate; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::any_of(thrust::host, A, A + 2, thrust::identity<bool>()); // returns true
- *  thrust::any_of(thrust::host, A, A + 3, thrust::identity<bool>()); // returns true
- *
- *  thrust::any_of(thrust::host, A + 2, A + 3, thrust::identity<bool>()); // returns false
- *
- *  // empty range
- *  thrust::any_of(thrust::host, A, A, thrust::identity<bool>()); // returns false
- *  \endcode
- *
- *  \see all_of
- *  \see none_of
- *  \see transform_reduce
- */
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool any_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred);
-   
-
-/*! \p any_of determines whether any element in a range satifies a predicate.
- * Specifically, \p any_of returns \c true if <tt>pred(*i)</tt> is \c true
- * for any iterator \c i in the range <tt>[first, last)</tt> and 
- * \c false otherwise.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if any element satisfies the predicate; \c false, otherwise.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::any_of(A, A + 2, thrust::identity<bool>()); // returns true
- *  thrust::any_of(A, A + 3, thrust::identity<bool>()); // returns true
- *
- *  thrust::any_of(A + 2, A + 3, thrust::identity<bool>()); // returns false
- *
- *  // empty range
- *  thrust::any_of(A, A, thrust::identity<bool>()); // returns false
- *  \endcode
- *
- *  \see all_of
- *  \see none_of
- *  \see transform_reduce
- */
-template <typename InputIterator, typename Predicate>
-bool any_of(InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \p none_of determines whether no element in a range satifies a predicate.
- *  Specifically, \p none_of returns \c true if there is no iterator \c i in 
- *  the range <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c true,
- *  and \c false otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if no element satisfies the predicate; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::none_of(thrust::host, A, A + 2, thrust::identity<bool>()); // returns false
- *  thrust::none_of(thrust::host, A, A + 3, thrust::identity<bool>()); // returns false
- *
- *  thrust::none_of(thrust::host, A + 2, A + 3, thrust::identity<bool>()); // returns true
- *
- *  // empty range
- *  thrust::none_of(thrust::host, A, A, thrust::identity<bool>()); // returns true
- *  \endcode
- *
- *  \see all_of
- *  \see any_of
- *  \see transform_reduce
- */
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-bool none_of(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \p none_of determines whether no element in a range satifies a predicate.
- *  Specifically, \p none_of returns \c true if there is no iterator \c i in 
- *  the range <tt>[first, last)</tt> such that <tt>pred(*i)</tt> is \c true,
- *  and \c false otherwise.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param pred A predicate used to test range elements.
- *  \return \c true, if no element satisfies the predicate; \c false, otherwise.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \code
- *  #include <thrust/logical.h>
- *  #include <thrust/functional.h>
- *  ...
- *  bool A[3] = {true, true, false};
- *
- *  thrust::none_of(A, A + 2, thrust::identity<bool>()); // returns false
- *  thrust::none_of(A, A + 3, thrust::identity<bool>()); // returns false
- *
- *  thrust::none_of(A + 2, A + 3, thrust::identity<bool>()); // returns true
- *
- *  // empty range
- *  thrust::none_of(A, A, thrust::identity<bool>()); // returns true
- *  \endcode
- *
- *  \see all_of
- *  \see any_of
- *  \see transform_reduce
- */
-template <typename InputIterator, typename Predicate>
-bool none_of(InputIterator first, InputIterator last, Predicate pred);
-
-
-/*! \} // end logical
- *  \} // end reductions
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/logical.inl>
-
diff --git a/compat/thrust/memory.h b/compat/thrust/memory.h
deleted file mode 100644
index 6362de4064..0000000000
--- a/compat/thrust/memory.h
+++ /dev/null
@@ -1,538 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/memory.h
- *  \brief Abstractions for Thrust's memory model.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/detail/pointer.h>
-#include <thrust/detail/reference.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/raw_reference_cast.h>
-#include <thrust/detail/malloc_and_free.h>
-#include <thrust/detail/temporary_buffer.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup memory_management Memory Management
- *  \addtogroup memory_management_classes Memory Management Classes
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p pointer stores a pointer to an object allocated in memory. Like \p device_ptr, this
- *  type ensures type safety when dispatching standard algorithms on ranges resident in memory.
- *
- *  \p pointer generalizes \p device_ptr by relaxing the backend system associated with the \p pointer.
- *  Instead of the backend system specified by \p THRUST_DEFAULT_DEVICE_BACKEND, \p pointer's
- *  system is given by its second template parameter, \p Tag. For the purpose of Thrust dispatch,
- *  <tt>device_ptr<Element></tt> and <tt>pointer<Element,device_system_tag></tt> are considered equivalent.
- *
- *  The raw pointer encapsulated by a \p pointer may be obtained through its <tt>get</tt> member function
- *  or the \p raw_pointer_cast free function.
- *
- *  \tparam Element specifies the type of the pointed-to object.
- *
- *  \tparam Tag specifies the system with which this \p pointer is associated. This may be any Thrust
- *          backend system, or a user-defined tag.
- *
- *  \tparam Reference allows the client to specify the reference type returned upon derereference.
- *          By default, this type is <tt>reference<Element,pointer></tt>.
- *
- *  \tparam Derived allows the client to specify the name of the derived type when \p pointer is used as
- *          a base class. This is useful to ensure that arithmetic on values of the derived type return
- *          values of the derived type as a result. By default, this type is <tt>pointer<Element,Tag,Reference></tt>.
- *
- *  \note \p pointer is not a smart pointer; it is the client's responsibility to deallocate memory
- *        pointer to by \p pointer.
- *
- *  \see device_ptr
- *  \see reference
- *  \see raw_pointer_cast
- */
-// define pointer for the purpose of Doxygenating it
-// it is actually defined elsewhere
-#if 0
-template<typename Element, typename Tag, typename Reference = thrust::use_default, typename Derived = thrust::use_default>
-  class pointer
-{
-  public:
-    /*! The type of the raw pointer
-     */
-    typedef typename super_t::base_type raw_pointer;
-    
-    /*! \p pointer's default constructor initializes its encapsulated pointer to \c 0
-     */
-    __host__ __device__
-    pointer();
-
-    /*! This constructor allows construction of a <tt>pointer<const T, ...></tt> from a <tt>T*</tt>.
-     *
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in \p Tag's memory.
-     *  \tparam OtherElement \p OtherElement shall be convertible to \p Element.
-     */
-    template<typename OtherElement>
-    __host__ __device__
-    explicit pointer(OtherElement *ptr);
-
-    /*! This contructor allows initialization from another pointer-like object.
-     *
-     *  \param other The \p OtherPointer to copy.
-     *
-     *  \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag,
-     *                       and its element type shall be convertible to \p Element.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer<Element,Tag,Reference,Derived>
-            >::type * = 0);
-
-    /*! Assignment operator allows assigning from another pointer-like object with related type.
-     *
-     *  \param other The other pointer-like object to assign from.
-     *  \return <tt>*this</tt>
-     *
-     *  \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag,
-     *                       and its element type shall be convertible to \p Element.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      derived_type &
-    >::type
-    operator=(const OtherPointer &other);
-
-    /*! \p get returns this \p pointer's encapsulated raw pointer.
-     *  \return This \p pointer's raw pointer.
-     */
-    __host__ __device__
-    Element *get() const;
-};
-#endif
-
-/*! \p reference is a wrapped reference to an object stored in memory. \p reference generalizes
- *  \p device_reference by relaxing the type of pointer associated with the object. \p reference
- *  is the type of the result of dereferencing a tagged pointer-like object such as \p pointer, and
- *  intermediates operations on objects existing in a remote memory.
- *
- *  \tparam Element specifies the type of the referent object.
- *  \tparam Pointer specifies the type of the result of taking the address of \p reference.
- *  \tparam Derived allows the client to specify the name of the derived type when \p reference is used as
- *          a base class. This is useful to ensure that assignment to objects of the derived type return
- *          values of the derived type as a result. By default, this type is <tt>reference<Element,Pointer></tt>.
- */
-// define pointer for the purpose of Doxygenating it
-// it is actually defined elsewhere
-#if 0
-template<typename Element, typename Pointer, typename Derived = thrust::use_default>
-  class reference
-{
-  public:
-    /*! The type of this \p reference's wrapped pointers.
-     */
-    typedef Pointer                                              pointer;
-
-    /*! The \p value_type of this \p reference.
-     */
-    typedef typename thrust::detail::remove_const<Element>::type value_type;
-
-    /*! This copy constructor initializes this \p reference
-     *  to refer to an object pointed to by the given \p pointer. After
-     *  this \p reference is constructed, it shall refer to the
-     *  object pointed to by \p ptr.
-     *
-     *  \param ptr A \p pointer to copy from.
-     */
-    __host__ __device__
-    explicit reference(const pointer &ptr);
-
-    /*! This copy constructor accepts a const reference to another
-     *  \p reference of related type. After this \p reference is constructed,
-     *  it shall refer to the same object as \p other.
-     *  
-     *  \param other A \p reference to copy from.
-     *  \tparam OtherElement the element type of the other \p reference.
-     *  \tparam OtherPointer the pointer type of the other \p reference.
-     *  \tparam OtherDerived the derived type of the other \p reference.
-     *
-     *  \note This constructor is templated primarily to allow initialization of 
-     *  <tt>reference<const T,...></tt> from <tt>reference<T,...></tt>.
-     */
-    template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    __host__ __device__
-    reference(const reference<OtherElement,OtherPointer,OtherDerived> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherElement,OtherPointer,OtherDerived>::pointer,
-                pointer
-              >::type * = 0);
-
-    /*! Copy assignment operator copy assigns from another \p reference.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>static_cast<derived_type&>(*this)</tt>
-     */
-    __host__ __device__
-    derived_type &operator=(const reference &other);
-
-    /*! Assignment operator copy assigns from another \p reference of related type.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>static_cast<derived_type&>(*this)</tt>
-     *
-     *  \tparam OtherElement the element type of the other \p reference.
-     *  \tparam OtherPointer the pointer type of the other \p reference.
-     *  \tparam OtherDerived the derived type of the other \p reference.
-     */
-    template<typename OtherElement, typename OtherPointer, typename OtherDerived>
-    __host__ __device__
-    derived_type &operator=(const reference<OtherElement,OtherPointer,OtherDerived> &other);
-
-    /*! Assignment operator assigns from a \p value_type.
-     *
-     *  \param x The \p value_type to assign from.
-     *  \return <tt>static_cast<derived_type&>(*this)</tt>.
-     */
-    __host__ __device__
-    derived_type &operator=(const value_type &x);
-
-    /*! Address-of operator returns a \p pointer pointing to the object
-     *  referenced by this \p reference. It does not return the address of this
-     *  \p reference.
-     *
-     *  \return A \p pointer pointing to the referenct object.
-     */
-    __host__ __device__
-    pointer operator&() const;
-
-    /*! Conversion operator converts this \p reference to \p value_type by
-     *  returning a copy of the referent object.
-     *  
-     *  \return A copy of the referent object.
-     */
-    __host__ __device__
-    operator value_type () const;
-
-    /*! Swaps the value of the referent object with another.
-     *
-     *  \param other The other \p reference with which to swap.
-     *  \note The argument is of type \p derived_type rather than \p reference.
-     */
-    __host__ __device__
-    void swap(derived_type &other);
-
-    /*! Prefix increment operator increments the referent object.
-     *
-     *  \return <tt>static_Cast<derived_type&>(*this)</tt>.
-     *
-     *  \note Documentation for other arithmetic operators omitted for brevity.
-     */
-    derived_type &operator++();
-};
-#endif
-
-/*! \}
- */
-
-/*!
- *  \addtogroup memory_management_functions Memory Management Functions
- *  \ingroup memory_management
- *  \{
- */
-
-
-/*! \addtogroup allocation_functions
- *  \{
- */
-
-
-/*! This version of \p malloc allocates untyped uninitialized storage associated with a given system.
- *
- *  \param system The Thrust system with which to associate the storage.
- *  \param n The number of bytes of storage to allocate.
- *  \return If allocation succeeds, a pointer to the allocated storage; a null pointer otherwise.
- *          The pointer must be deallocated with \p thrust::free.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *
- *  \pre \p DerivedPolicy must be publically derived from <code>thrust::execution_policy<DerivedPolicy></code>.
- *
- *  The following code snippet demonstrates how to use \p malloc to allocate a range of memory
- *  associated with Thrust's device system.
- *
- *  \code
- *  #include <thrust/memory.h>
- *  ...
- *  // allocate some memory with thrust::malloc
- *  const int N = 100;
- *  thrust::device_system_tag device_sys;
- *  thrust::pointer<void,thrust::device_space_tag> void_ptr = thrust::malloc(device_sys, N);
- *
- *  // manipulate memory
- *  ...
- *
- *  // deallocate void_ptr with thrust::free
- *  thrust::free(device_sys, void_ptr);
- *  \endcode
- *
- *  \see free
- *  \see device_malloc
- */
-template<typename DerivedPolicy>
-pointer<void,DerivedPolicy> malloc(const thrust::detail::execution_policy_base<DerivedPolicy> &system, std::size_t n);
-
-
-/*! This version of \p malloc allocates typed uninitialized storage associated with a given system.
- *
- *  \param system The Thrust system with which to associate the storage.
- *  \param n The number of elements of type \c T which the storage should accomodate.
- *  \return If allocation succeeds, a pointer to an allocation large enough to accomodate \c n
- *          elements of type \c T; a null pointer otherwise.
- *          The pointer must be deallocated with \p thrust::free.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *
- *  \pre \p DerivedPolicy must be publically derived from <code>thrust::execution_policy<DerivedPolicy></code>.
- *
- *  The following code snippet demonstrates how to use \p malloc to allocate a range of memory
- *  to accomodate integers associated with Thrust's device system.
- *
- *  \code
- *  #include <thrust/memory.h>
- *  ...
- *  // allocate storage for 100 ints with thrust::malloc
- *  const int N = 100;
- *  thrust::device_system_tag device_sys;
- *  thrust::pointer<int,thrust::device_system_tag> ptr = thrust::malloc<int>(device_sys, N);
- *
- *  // manipulate memory
- *  ...
- *
- *  // deallocate ptr with thrust::free
- *  thrust::free(device_sys, ptr);
- *  \endcode
- *
- *  \see free
- *  \see device_malloc
- */
-template<typename T, typename DerivedPolicy>
-pointer<T,DerivedPolicy> malloc(const thrust::detail::execution_policy_base<DerivedPolicy> &system, std::size_t n);
-
-
-/*! \p get_temporary_buffer returns a pointer to storage associated with a given Thrust system sufficient to store up to
- *  \p n objects of type \c T. If not enough storage is available to accomodate \p n objects, an implementation may return
- *  a smaller buffer. The number of objects the returned buffer can accomodate is also returned.
- *
- *  Thrust uses \p get_temporary_buffer internally when allocating temporary storage required by algorithm implementations.
- *
- *  The storage allocated with \p get_temporary_buffer must be returned to the system with \p return_temporary_buffer.
- *
- *  \param system The Thrust system with which to associate the storage.
- *  \param n The requested number of objects of type \c T the storage should accomodate.
- *  \return A pair \c p such that <tt>p.first</tt> is a pointer to the allocated storage and <tt>p.second</tt> is the number of
- *          contiguous objects of type \c T that the storage can accomodate. If no storage can be allocated, <tt>p.first</tt> if
- *          no storage can be obtained. The storage must be returned to the system using \p return_temporary_buffer.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *
- *  \pre \p DerivedPolicy must be publically derived from <code>thrust::execution_policy<DerivedPolicy></code>.
- *
- *  The following code snippet demonstrates how to use \p get_temporary_buffer to allocate a range of memory
- *  to accomodate integers associated with Thrust's device system.
- *
- *  \code
- *  #include <thrust/memory.h>
- *  ...
- *  // allocate storage for 100 ints with thrust::get_temporary_buffer
- *  const int N = 100;
- *
- *  typedef thrust::pair<
- *    thrust::pointer<int,thrust::device_system_tag>,
- *    std::ptrdiff_t
- *  > ptr_and_size_t;
- *
- *  thrust::device_system_tag device_sys;
- *  ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer<int>(device_sys, N);
- *
- *  // manipulate up to 100 ints
- *  for(int i = 0; i < ptr_and_size.second; ++i)
- *  {
- *    *ptr_and_size.first = i;
- *  }
- *
- *  // deallocate storage with thrust::return_temporary_buffer
- *  thrust::return_temporary_buffer(device_sys, ptr_and_size.first);
- *  \endcode
- *
- *  \see malloc
- *  \see return_temporary_buffer
- */
-template<typename T, typename DerivedPolicy>
-thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type>
-get_temporary_buffer(const thrust::detail::execution_policy_base<DerivedPolicy> &system, typename thrust::pointer<T,DerivedPolicy>::difference_type n);
-
-
-/*! \} allocation_functions
- */
-
-
-/*! \addtogroup deallocation_functions
- *  \{
- */
-
-
-/*! \p free deallocates the storage previously allocated by \p thrust::malloc.
- *
- *  \param system The Thrust system with which the storage is associated.
- *  \param ptr A pointer previously returned by \p thrust::malloc. If \p ptr is null, \p free
- *         does nothing.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *
- *  \pre \p ptr shall have been returned by a previous call to <tt>thrust::malloc(system, n)</tt> or <tt>thrust::malloc<T>(system, n)</tt> for some type \c T.
- *
- *  The following code snippet demonstrates how to use \p free to deallocate a range of memory
- *  previously allocated with \p thrust::malloc.
- *
- *  \code
- *  #include <thrust/memory.h>
- *  ...
- *  // allocate storage for 100 ints with thrust::malloc
- *  const int N = 100;
- *  thrust::device_system_tag device_sys;
- *  thrust::pointer<int,thrust::device_system_tag> ptr = thrust::malloc<int>(device_sys, N);
- *
- *  // mainpulate memory
- *  ...
- *
- *  // deallocate ptr with thrust::free
- *  thrust::free(device_sys, ptr);
- *  \endcode
- */
-template<typename DerivedPolicy, typename Pointer>
-void free(const thrust::detail::execution_policy_base<DerivedPolicy> &system, Pointer ptr);
-
-
-/*! \p return_temporary_buffer deallocates storage associated with a given Thrust system previously allocated by \p get_temporary_buffer.
- *
- *  Thrust uses \p return_temporary_buffer internally when deallocating temporary storage required by algorithm implementations.
- *
- *  \param system The Thrust system with which the storage is associated.
- *  \param p A pointer previously returned by \p thrust::get_temporary_buffer. If \p ptr is null, \p return_temporary_buffer does nothing.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *
- *  \pre \p p shall have been previously allocated by \p thrust::get_temporary_buffer.
- *
- *  The following code snippet demonstrates how to use \p return_temporary_buffer to deallocate a range of memory
- *  previously allocated by \p get_temporary_buffer.
- *
- *  \code
- *  #include <thrust/memory.h>
- *  ...
- *  // allocate storage for 100 ints with thrust::get_temporary_buffer
- *  const int N = 100;
- *
- *  typedef thrust::pair<
- *    thrust::pointer<int,thrust::device_system_tag>,
- *    std::ptrdiff_t
- *  > ptr_and_size_t;
- *
- *  thrust::device_system_tag device_sys;
- *  ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer<int>(device_sys, N);
- *
- *  // manipulate up to 100 ints
- *  for(int i = 0; i < ptr_and_size.second; ++i)
- *  {
- *    *ptr_and_size.first = i;
- *  }
- *
- *  // deallocate storage with thrust::return_temporary_buffer
- *  thrust::return_temporary_buffer(device_sys, ptr_and_size.first);
- *  \endcode
- *
- *  \see free
- *  \see get_temporary_buffer
- */
-template<typename DerivedPolicy, typename Pointer>
-void return_temporary_buffer(const thrust::detail::execution_policy_base<DerivedPolicy> &system, Pointer p);
-
-
-/*! \} deallocation_functions
- */
-
-
-/*! \p raw_pointer_cast creates a "raw" pointer from a pointer-like type,
- *  simply returning the wrapped pointer, should it exist.
- *
- *  \param ptr The pointer of interest.
- *  \return <tt>ptr.get()</tt>, if the expression is well formed; <tt>ptr</tt>, otherwise.
- *  \see raw_reference_cast
- */
-template<typename Pointer>
-__host__ __device__
-inline typename thrust::detail::pointer_traits<Pointer>::raw_pointer
-  raw_pointer_cast(const Pointer &ptr);
-
-
-/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type,
- *  simply returning the underlying reference, should it exist.
- *
- *  If the argument is not a reference wrapper, the result is a reference to the argument.
- *
- *  \param ref The reference of interest.
- *  \return <tt>*thrust::raw_pointer_cast(&ref)</tt>.
- *  \note There are two versions of \p raw_reference_cast. One for <tt>const</tt> references,
- *        and one for non-<tt>const</tt>.
- *  \see raw_pointer_cast
- */
-template<typename T>
-__host__ __device__
-inline typename detail::raw_reference<T>::type
-  raw_reference_cast(T &ref);
-
-
-/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type,
- *  simply returning the underlying reference, should it exist.
- *
- *  If the argument is not a reference wrapper, the result is a reference to the argument.
- *
- *  \param ref The reference of interest.
- *  \return <tt>*thrust::raw_pointer_cast(&ref)</tt>.
- *  \note There are two versions of \p raw_reference_cast. One for <tt>const</tt> references,
- *        and one for non-<tt>const</tt>.
- *  \see raw_pointer_cast
- */
-template<typename T>
-__host__ __device__
-inline typename detail::raw_reference<const T>::type
-  raw_reference_cast(const T &ref);
-
-
-/*! \}
- */
-
-} // end thrust
-
diff --git a/compat/thrust/merge.h b/compat/thrust/merge.h
deleted file mode 100644
index e5fa7b47ac..0000000000
--- a/compat/thrust/merge.h
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file merge.h
- *  \brief Merging sorted ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup merging Merging
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p merge combines two sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>
- *  into a single sorted range. That is, it copies from <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt> into <tt>[result, result + (last1 - first1) + (last2 - first2))</tt>
- *  such that the resulting range is in ascending order. \p merge is stable, meaning both that the
- *  relative order of elements within each input range is preserved, and that for equivalent elements
- *  in both input ranges the element from the first range precedes the element from the second. The
- *  return value is <tt>result + (last1 - first1) + (last2 - first2)</tt>.
- *
- *  This version of \p merge compares elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the merged output.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge to compute the merger of two sorted sets of integers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {1, 3, 5, 7, 9, 11};
- *  int A2[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int result[13];
- *
- *  int *result_end =
- *    thrust::merge(thrust::host,
- *                  A1, A1 + 6,
- *                  A2, A2 + 7,
- *                  result);
- *  // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/merge.html
- *  \see \p set_union
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result);
-
-
-/*! \p merge combines two sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>
- *  into a single sorted range. That is, it copies from <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt> into <tt>[result, result + (last1 - first1) + (last2 - first2))</tt>
- *  such that the resulting range is in ascending order. \p merge is stable, meaning both that the
- *  relative order of elements within each input range is preserved, and that for equivalent elements
- *  in both input ranges the element from the first range precedes the element from the second. The
- *  return value is <tt>result + (last1 - first1) + (last2 - first2)</tt>.
- *
- *  This version of \p merge compares elements using \c operator<.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the merged output.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge to compute the merger of two sorted sets of integers.
- *
- *  \code
- *  #include <thrust/merge.h>
- *  ...
- *  int A1[6] = {1, 3, 5, 7, 9, 11};
- *  int A2[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int result[13];
- *
- *  int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result);
- *  // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/merge.html
- *  \see \p set_union
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result);
-
-
-/*! \p merge combines two sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>
- *  into a single sorted range. That is, it copies from <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt> into <tt>[result, result + (last1 - first1) + (last2 - first2))</tt>
- *  such that the resulting range is in ascending order. \p merge is stable, meaning both that the
- *  relative order of elements within each input range is preserved, and that for equivalent elements
- *  in both input ranges the element from the first range precedes the element from the second. The
- *  return value is <tt>result + (last1 - first1) + (last2 - first2)</tt>.
- *
- *  This version of \p merge compares elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the merged output.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge to compute the merger of two sets of integers sorted in
- *  descending order using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {11, 9, 7, 5, 3, 1};
- *  int A2[7] = {13, 8, 5, 3, 2, 1, 1};
- *
- *  int result[13];
- *
- *  int *result_end = thrust::merge(thrust::host,
- *                                  A1, A1 + 6,
- *                                  A2, A2 + 7,
- *                                  result,
- *                                  thrust::greater<int>());
- *  // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/merge.html
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator merge(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakCompare comp);
-
-
-/*! \p merge combines two sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>
- *  into a single sorted range. That is, it copies from <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt> into <tt>[result, result + (last1 - first1) + (last2 - first2))</tt>
- *  such that the resulting range is in ascending order. \p merge is stable, meaning both that the
- *  relative order of elements within each input range is preserved, and that for equivalent elements
- *  in both input ranges the element from the first range precedes the element from the second. The
- *  return value is <tt>result + (last1 - first1) + (last2 - first2)</tt>.
- *
- *  This version of \p merge compares elements using a function object \p comp.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the merged output.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge to compute the merger of two sets of integers sorted in
- *  descending order.
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A1[6] = {11, 9, 7, 5, 3, 1};
- *  int A2[7] = {13, 8, 5, 3, 2, 1, 1};
- *
- *  int result[13];
- *
- *  int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result, thrust::greater<int>());
- *  // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/merge.html
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator merge(InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakCompare comp);
-
-
-/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from
- *  <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> into a single range,
- *  <tt>[keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending key order.
- *
- *  At the same time, \p merge_by_key copies elements from the two associated ranges <tt>[values_first1 + (keys_last1 - keys_first1))</tt>
- *  and <tt>[values_first2 + (keys_last2 - keys_first2))</tt> into a single range,
- *  <tt>[values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending order implied by each input element's associated key.
- *
- *  \p merge_by_key is stable, meaning both that the relative order of elements within each input range is
- *  preserved, and that for equivalent elements in all input key ranges the element from the first range
- *  precedes the element from the second.
- *
- *  The return value is is <tt>(keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>
- *  and <tt>(values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the merged output range of keys.
- *  \param values_result The beginning of the merged output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge_by_key to compute the merger of two sets of integers sorted in
- *  ascending order using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {1, 3, 5, 7, 9, 11};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[7] = {1, 1, 2, 3, 5, 8, 13};
- *  int B_vals[7] = {1, 1, 1, 1, 1, 1, 1};
- *
- *  int keys_result[13];
- *  int vals_result[13];
- *
- *  thrust::pair<int*,int*> end =
- *    thrust::merge_by_key(thrust::host,
- *                         A_keys, A_keys + 6,
- *                         B_keys, B_keys + 7,
- *                         A_vals, B_vals,
- *                         keys_result, vals_result);
- *
- *  // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13}
- *  // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,  0,  1}
- *  \endcode
- *
- *  \see merge
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result);
-
-
-/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from
- *  <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> into a single range,
- *  <tt>[keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending key order.
- *
- *  At the same time, \p merge_by_key copies elements from the two associated ranges <tt>[values_first1 + (keys_last1 - keys_first1))</tt>
- *  and <tt>[values_first2 + (keys_last2 - keys_first2))</tt> into a single range,
- *  <tt>[values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending order implied by each input element's associated key.
- *
- *  \p merge_by_key is stable, meaning both that the relative order of elements within each input range is
- *  preserved, and that for equivalent elements in all input key ranges the element from the first range
- *  precedes the element from the second.
- *
- *  The return value is is <tt>(keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>
- *  and <tt>(values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the merged output range of keys.
- *  \param values_result The beginning of the merged output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge_by_key to compute the merger of two sets of integers sorted in
- *  ascending order.
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {1, 3, 5, 7, 9, 11};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[7] = {1, 1, 2, 3, 5, 8, 13};
- *  int B_vals[7] = {1, 1, 1, 1, 1, 1, 1};
- *
- *  int keys_result[13];
- *  int vals_result[13];
- *
- *  thrust::pair<int*,int*> end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result);
- *
- *  // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13}
- *  // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,  0,  1}
- *  \endcode
- *
- *  \see merge
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result);
-
-
-/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from
- *  <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> into a single range,
- *  <tt>[keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending key order.
- *
- *  At the same time, \p merge_by_key copies elements from the two associated ranges <tt>[values_first1 + (keys_last1 - keys_first1))</tt>
- *  and <tt>[values_first2 + (keys_last2 - keys_first2))</tt> into a single range,
- *  <tt>[values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending order implied by each input element's associated key.
- *
- *  \p merge_by_key is stable, meaning both that the relative order of elements within each input range is
- *  preserved, and that for equivalent elements in all input key ranges the element from the first range
- *  precedes the element from the second.
- *
- *  The return value is is <tt>(keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>
- *  and <tt>(values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>.
- *
- *  This version of \p merge_by_key compares key elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized using \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the merged output range of keys.
- *  \param values_result The beginning of the merged output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge_by_key to compute the merger of two sets of integers sorted in
- *  descending order using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {11, 9, 7, 5, 3, 1};
- *  int A_vals[6] = { 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[7] = {13, 8, 5, 3, 2, 1, 1};
- *  int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1};
- *
- *  int keys_result[13];
- *  int vals_result[13];
- *
- *  thrust::pair<int*,int*> end =
- *    thrust::merge_by_key(thrust::host,
- *                         A_keys, A_keys + 6,
- *                         B_keys, B_keys + 7,
- *                         A_vals, B_vals,
- *                         keys_result, vals_result,
- *                         thrust::greater<int>());
- *
- *  // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1}
- *  // vals_result = { 1,  0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1}
- *  \endcode
- *
- *  \see merge
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2, typename Compare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 Compare comp);
-
-
-/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from
- *  <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> into a single range,
- *  <tt>[keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending key order.
- *
- *  At the same time, \p merge_by_key copies elements from the two associated ranges <tt>[values_first1 + (keys_last1 - keys_first1))</tt>
- *  and <tt>[values_first2 + (keys_last2 - keys_first2))</tt> into a single range,
- *  <tt>[values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt> such that
- *  the resulting range is in ascending order implied by each input element's associated key.
- *
- *  \p merge_by_key is stable, meaning both that the relative order of elements within each input range is
- *  preserved, and that for equivalent elements in all input key ranges the element from the first range
- *  precedes the element from the second.
- *
- *  The return value is is <tt>(keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>
- *  and <tt>(values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2))</tt>.
- *
- *  This version of \p merge_by_key compares key elements using a function object \p comp.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the merged output range of keys.
- *  \param values_result The beginning of the merged output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use
- *  \p merge_by_key to compute the merger of two sets of integers sorted in
- *  descending order.
- *
- *  \code
- *  #include <thrust/merge.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {11, 9, 7, 5, 3, 1};
- *  int A_vals[6] = { 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[7] = {13, 8, 5, 3, 2, 1, 1};
- *  int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1};
- *
- *  int keys_result[13];
- *  int vals_result[13];
- *
- *  thrust::pair<int*,int*> end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result, thrust::greater<int>());
- *
- *  // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1}
- *  // vals_result = { 1,  0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1}
- *  \endcode
- *
- *  \see merge
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2, typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 StrictWeakCompare comp);
-
-
-/*! \} // merging
- */
-
-} // end thrust
-
-#include <thrust/detail/merge.inl>
-
diff --git a/compat/thrust/mismatch.h b/compat/thrust/mismatch.h
deleted file mode 100644
index 898157ac90..0000000000
--- a/compat/thrust/mismatch.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file mismatch.h
- *  \brief Search for differences between ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup searching
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p mismatch finds the first position where the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> differ. The two versions of 
- *  \p mismatch use different tests for whether elements differ.
- *
- *  This version of \p mismatch finds the first iterator \c i in <tt>[first1, last1)</tt>
- *  such that <tt>*i == *(first2 + (i - first1))</tt> is \c false. The return value is a
- *  \c pair whose first element is \c i and whose second element is <tt>*(first2 + (i - first1))</tt>.
- *  If no such iterator \c i exists, the return value is a \c pair whose first element
- *  is \c last1 and whose second element is <tt>*(first2 + (last1 - first1))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \return The first position where the sequences differ.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *
- *  \code
- *  #include <thrust/mismatch.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> vec1(4);
- *  thrust::device_vector<int> vec2(4);
- *
- *  vec1[0] = 0;  vec2[0] = 0; 
- *  vec1[1] = 5;  vec2[1] = 5;
- *  vec1[2] = 3;  vec2[2] = 8;
- *  vec1[3] = 7;  vec2[3] = 7;
- *
- *  typedef thrust::device_vector<int>::iterator Iterator;
- *  thrust::pair<Iterator,Iterator> result;
- *
- *  result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin());
- *
- *  // result.first  is vec1.begin() + 2
- *  // result.second is vec2.begin() + 2
- *  \endcode
- *
- *  \see find
- *  \see find_if
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-thrust::pair<InputIterator1, InputIterator2> mismatch(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                                      InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2);
-
-
-/*! \p mismatch finds the first position where the two ranges <tt>[first1, last1)</tt>
- * and <tt>[first2, first2 + (last1 - first1))</tt> differ. The two versions of 
- * \p mismatch use different tests for whether elements differ.
- *
- * This version of \p mismatch finds the first iterator \c i in <tt>[first1, last1)</tt>
- * such that <tt>*i == *(first2 + (i - first1))</tt> is \c false. The return value is a
- * \c pair whose first element is \c i and whose second element is <tt>*(first2 + (i - first1))</tt>.
- * If no such iterator \c i exists, the return value is a \c pair whose first element
- * is \c last1 and whose second element is <tt>*(first2 + (last1 - first1))</tt>.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \return The first position where the sequences differ.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *
- *  \code
- *  #include <thrust/mismatch.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> vec1(4);
- *  thrust::device_vector<int> vec2(4);
- *
- *  vec1[0] = 0;  vec2[0] = 0; 
- *  vec1[1] = 5;  vec2[1] = 5;
- *  vec1[2] = 3;  vec2[2] = 8;
- *  vec1[3] = 7;  vec2[3] = 7;
- *
- *  typedef thrust::device_vector<int>::iterator Iterator;
- *  thrust::pair<Iterator,Iterator> result;
- *
- *  result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin());
- *
- *  // result.first  is vec1.begin() + 2
- *  // result.second is vec2.begin() + 2
- *  \endcode
- *
- *  \see find
- *  \see find_if
- */
-template <typename InputIterator1, typename InputIterator2>
-thrust::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2);
-
-
-/*! \p mismatch finds the first position where the two ranges <tt>[first1, last1)</tt>
- *  and <tt>[first2, first2 + (last1 - first1))</tt> differ. The two versions of 
- *  \p mismatch use different tests for whether elements differ.
- *
- *  This version of \p mismatch finds the first iterator \c i in <tt>[first1, last1)</tt>
- *  such that <tt>pred(\*i, \*(first2 + (i - first1))</tt> is \c false. The return value is a
- *  \c pair whose first element is \c i and whose second element is <tt>*(first2 + (i - first1))</tt>.
- *  If no such iterator \c i exists, the return value is a \c pair whose first element is
- *  \c last1 and whose second element is <tt>*(first2 + (last1 - first1))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param pred   The binary predicate to compare elements.
- *  \return The first position where the sequences differ.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Input Iterator</a>.
- *
- *  \code
- *  #include <thrust/mismatch.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> vec1(4);
- *  thrust::device_vector<int> vec2(4);
- *
- *  vec1[0] = 0;  vec2[0] = 0; 
- *  vec1[1] = 5;  vec2[1] = 5;
- *  vec1[2] = 3;  vec2[2] = 8;
- *  vec1[3] = 7;  vec2[3] = 7;
- *
- *  typedef thrust::device_vector<int>::iterator Iterator;
- *  thrust::pair<Iterator,Iterator> result;
- *
- *  result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to<int>());
- *
- *  // result.first  is vec1.begin() + 2
- *  // result.second is vec2.begin() + 2
- *  \endcode
- *
- *  \see find
- *  \see find_if
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-thrust::pair<InputIterator1, InputIterator2> mismatch(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                                      InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2,
-                                                      BinaryPredicate pred);
-
-
-/*! \p mismatch finds the first position where the two ranges <tt>[first1, last1)</tt>
- * and <tt>[first2, first2 + (last1 - first1))</tt> differ. The two versions of 
- * \p mismatch use different tests for whether elements differ.
- *
- * This version of \p mismatch finds the first iterator \c i in <tt>[first1, last1)</tt>
- * such that <tt>pred(\*i, \*(first2 + (i - first1))</tt> is \c false. The return value is a
- * \c pair whose first element is \c i and whose second element is <tt>*(first2 + (i - first1))</tt>.
- * If no such iterator \c i exists, the return value is a \c pair whose first element is
- * \c last1 and whose second element is <tt>*(first2 + (last1 - first1))</tt>.
- *
- *  \param first1 The beginning of the first sequence.
- *  \param last1  The end of the first sequence.
- *  \param first2 The beginning of the second sequence.
- *  \param pred   The binary predicate to compare elements.
- *  \return The first position where the sequences differ.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Input Iterator</a>.
- *
- *  \code
- *  #include <thrust/mismatch.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> vec1(4);
- *  thrust::device_vector<int> vec2(4);
- *
- *  vec1[0] = 0;  vec2[0] = 0; 
- *  vec1[1] = 5;  vec2[1] = 5;
- *  vec1[2] = 3;  vec2[2] = 8;
- *  vec1[3] = 7;  vec2[3] = 7;
- *
- *  typedef thrust::device_vector<int>::iterator Iterator;
- *  thrust::pair<Iterator,Iterator> result;
- *
- *  result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to<int>());
- *
- *  // result.first  is vec1.begin() + 2
- *  // result.second is vec2.begin() + 2
- *  \endcode
- *
- *  \see find
- *  \see find_if
- */
-template <typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-thrust::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1,
-                                                      InputIterator1 last1,
-                                                      InputIterator2 first2,
-                                                      BinaryPredicate pred);
-
-/*! \} // end searching
- */
-
-} // end namespace thrust
-
-#include <thrust/detail/mismatch.inl>
-
diff --git a/compat/thrust/pair.h b/compat/thrust/pair.h
deleted file mode 100644
index 897cc078e0..0000000000
--- a/compat/thrust/pair.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file pair.h
- *  \brief A type encapsulating a heterogeneous pair of elements
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <utility>
-
-namespace thrust
-{
-
-/*! \addtogroup utility
- *  \{
- */
-
-/*! \addtogroup pair
- *  \{
- */
-
-/*! \p pair is a generic data structure encapsulating a heterogeneous
- *  pair of values.
- *
- *  \tparam T1 The type of \p pair's first object type.  There are no
- *          requirements on the type of \p T1. <tt>T1</tt>'s type is
- *          provided by <tt>pair::first_type</tt>.
- *
- *  \tparam T2 The type of \p pair's second object type.  There are no
- *          requirements on the type of \p T2. <tt>T2</tt>'s type is
- *          provided by <tt>pair::second_type</tt>.
- */
-template <typename T1, typename T2>
-  struct pair
-{
-  /*! \p first_type is the type of \p pair's first object type.
-   */
-  typedef T1 first_type;
-
-  /*! \p second_type is the type of \p pair's second object type.
-   */
-  typedef T2 second_type;
-
-  /*! The \p pair's first object.
-   */
-  first_type first;
-
-  /*! The \p pair's second object.
-   */
-  second_type second;
-
-  /*! \p pair's default constructor constructs \p first
-   *  and \p second using \c first_type & \c second_type's
-   *  default constructors, respectively.
-   */
-  __host__ __device__ pair(void);
-
-  /*! This constructor accepts two objects to copy into this \p pair.
-   *
-   *  \param x The object to copy into \p first.
-   *  \param y The object to copy into \p second.
-   */
-  inline __host__ __device__
-  pair(const T1 &x, const T2 &y);
-
-  /*! This copy constructor copies from a \p pair whose types are
-   *  convertible to this \p pair's \c first_type and \c second_type,
-   *  respectively.
-   *
-   *  \param p The \p pair to copy from.
-   *
-   *  \tparam U1 is convertible to \c first_type.
-   *  \tparam U2 is convertible to \c second_type.
-   */
-  template <typename U1, typename U2>
-  inline __host__ __device__
-  pair(const pair<U1,U2> &p);
-
-  /*! This copy constructor copies from a <tt>std::pair</tt> whose types are
-   *  convertible to this \p pair's \c first_type and \c second_type,
-   *  respectively.
-   *
-   *  \param p The <tt>std::pair</tt> to copy from.
-   *
-   *  \tparam U1 is convertible to \c first_type.
-   *  \tparam U2 is convertible to \c second_type.
-   */
-  template <typename U1, typename U2>
-  inline __host__ __device__
-  pair(const std::pair<U1,U2> &p);
-
-  /*! \p swap swaps the elements of two <tt>pair</tt>s.
-   *  
-   *  \param p The other <tt>pair</tt> with which to swap.
-   */
-  inline __host__ __device__
-  void swap(pair &p);
-}; // end pair
-
-
-/*! This operator tests two \p pairs for equality.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>x.first == y.first && x.second == y.second</tt>.
- *  
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator==(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! This operator tests two pairs for ascending ordering.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>x.first < y.first || (!(y.first < x.first) && x.second < y.second)</tt>.
- *
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator<(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! This operator tests two pairs for inequality.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>!(x == y)</tt>.
- *
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator!=(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! This operator tests two pairs for descending ordering.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>y < x</tt>.
- *
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator>(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! This operator tests two pairs for ascending ordering or equivalence.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>!(y < x)</tt>.
- *
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator<=(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! This operator tests two pairs for descending ordering or equivalence.
- *
- *  \param x The first \p pair to compare.
- *  \param y The second \p pair to compare.
- *  \return \c true if and only if <tt>!(x < y)</tt>.
- *
- *  \tparam T1 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *  \tparam T2 is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    bool operator>=(const pair<T1,T2> &x, const pair<T1,T2> &y);
-
-
-/*! \p swap swaps the contents of two <tt>pair</tt>s.
- *
- *  \param x The first \p pair to swap.
- *  \param y The second \p pair to swap.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    void swap(pair<T1,T2> &x, pair<T1,T2> &y);
-
-
-/*! This convenience function creates a \p pair from two objects.
- *
- *  \param x The first object to copy from.
- *  \param y The second object to copy from.
- *  \return A newly-constructed \p pair copied from \p a and \p b.
- *
- *  \tparam T1 There are no requirements on the type of \p T1.
- *  \tparam T2 There are no requirements on the type of \p T2.
- */
-template <typename T1, typename T2>
-  inline __host__ __device__
-    pair<T1,T2> make_pair(T1 x, T2 y);
-
-
-/*! This convenience metafunction is included for compatibility with
- *  \p tuple. It returns either the type of a \p pair's
- *  \c first_type or \c second_type in its nested type, \c type.
- *
- *  \tparam N This parameter selects the member of interest.
- *  \tparam T A \c pair type of interest.
- */
-template<int N, typename T> struct tuple_element;
-
-
-/*! This convenience metafunction is included for compatibility with
- *  \p tuple. It returns \c 2, the number of elements of a \p pair,
- *  in its nested data member, \c value.
- *
- *  \tparam Pair A \c pair type of interest.
- */
-template<typename Pair> struct tuple_size;
-
-
-/*! This convenience function returns a reference to either the first or
- *  second member of a \p pair.
- *
- *  \param p The \p pair of interest.
- *  \return \c p.first or \c p.second, depending on the template
- *          parameter.
- *
- *  \tparam N This parameter selects the member of interest.
- */
-// XXX comment out these prototypes as a WAR to a problem on MSVC 2005
-//template<unsigned int N, typename T1, typename T2>
-//  inline __host__ __device__
-//    typename tuple_element<N, pair<T1,T2> >::type &
-//      get(pair<T1,T2> &p);
-
-
-/*! This convenience function returns a const reference to either the
- *  first or second member of a \p pair.
- *
- *  \param p The \p pair of interest.
- *  \return \c p.first or \c p.second, depending on the template
- *          parameter.
- *
- *  \tparam i This parameter selects the member of interest.
- */
-// XXX comment out these prototypes as a WAR to a problem on MSVC 2005
-//template<int N, typename T1, typename T2>
-//  inline __host__ __device__
-//    const typename tuple_element<N, pair<T1,T2> >::type &
-//      get(const pair<T1,T2> &p);
-
-/*! \} // pair
- */
-
-/*! \} // utility
- */
-
-} // end thrust
-
-#include <thrust/detail/pair.inl>
-
diff --git a/compat/thrust/partition.h b/compat/thrust/partition.h
deleted file mode 100644
index 61a6278a8d..0000000000
--- a/compat/thrust/partition.h
+++ /dev/null
@@ -1,1429 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file partition.h
- *  \brief Reorganizes a range based on a predicate
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reordering
- *  \ingroup algorithms
- *
- *  \addtogroup partitioning
- *  \ingroup reordering
- *  \{
- */
-
-
-/*! \p partition reorders the elements <tt>[first, last)</tt> based on the function
- *  object \p pred, such that all of the elements that satisfy \p pred precede the
- *  elements that fail to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*i)</tt> is \c true for every
- *  iterator \c i in the range <tt>[first,middle)</tt> and \c false for every iterator
- *  \c i in the range <tt>[middle, last)</tt>. The return value of \p partition is
- *  \c middle.
- *
- *  Note that the relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition, does guarantee to preserve the relative order.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements which do not satisfy \p pred.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type,
- *          and \p ForwardIterator is mutable.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p partition to reorder a
- *  sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::partition(thrust::host,
- *                    A, A + N,
- *                    is_even());
- *  // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partition.html
- *  \see \p stable_partition
- *  \see \p partition_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-/*! \p partition reorders the elements <tt>[first, last)</tt> based on the function
- *  object \p pred, such that all of the elements that satisfy \p pred precede the
- *  elements that fail to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*i)</tt> is \c true for every
- *  iterator \c i in the range <tt>[first,middle)</tt> and \c false for every iterator
- *  \c i in the range <tt>[middle, last)</tt>. The return value of \p partition is
- *  \c middle.
- *
- *  Note that the relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition, does guarantee to preserve the relative order.
- *
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements which do not satisfy \p pred.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type,
- *          and \p ForwardIterator is mutable.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p partition to reorder a
- *  sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::partition(A, A + N,
- *                     is_even());
- *  // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partition.html
- *  \see \p stable_partition
- *  \see \p partition_copy
- */
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-/*! \p partition reorders the elements <tt>[first, last)</tt> based on the function
- *  object \p pred applied to a stencil range <tt>[stencil, stencil + (last - first))</tt>,
- *  such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose
- *  corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*stencil_i)</tt> is \c true for every iterator
- *  \c stencil_i in the range <tt>[stencil,stencil + (middle - first))</tt> and \c false for every iterator \c stencil_i
- *  in the range <tt>[stencil + (middle - first), stencil + (last - first))</tt>.
- *  The return value of \p stable_partition is \c middle.
- *
- *  Note that the relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition, does guarantee to preserve the relative order.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements whose stencil elements do not satisfy \p pred.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[stencil, stencil + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p partition to reorder a
- *  sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::partition(thrust::host, A, A + N, S, is_even());
- *  // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  // S is unmodified
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partition.html
- *  \see \p stable_partition
- *  \see \p partition_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-/*! \p partition reorders the elements <tt>[first, last)</tt> based on the function
- *  object \p pred applied to a stencil range <tt>[stencil, stencil + (last - first))</tt>,
- *  such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose
- *  corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*stencil_i)</tt> is \c true for every iterator
- *  \c stencil_i in the range <tt>[stencil,stencil + (middle - first))</tt> and \c false for every iterator \c stencil_i
- *  in the range <tt>[stencil + (middle - first), stencil + (last - first))</tt>.
- *  The return value of \p stable_partition is \c middle.
- *
- *  Note that the relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition, does guarantee to preserve the relative order.
- *
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements whose stencil elements do not satisfy \p pred.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The ranges <tt>[first,last)</tt> and <tt>[stencil, stencil + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p partition to reorder a
- *  sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::partition(A, A + N, S, is_even());
- *  // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  // S is unmodified
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partition.html
- *  \see \p stable_partition
- *  \see \p partition_copy
- */
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-/*! \p partition_copy differs from \ref partition only in that the reordered
- *  sequence is written to difference output sequences, rather than in place.
- *
- *  \p partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred. All of the elements that satisfy \p pred are copied
- *  to the range beginning at \p out_true and all the elements that fail to satisfy it
- *  are copied to the range beginning at \p out_false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type
- *          is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input range shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p partition_copy to separate a
- *  sequence into two output sequences of even and odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::partition_copy(thrust::host, A, A + N, evens, odds, is_even());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \note The relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition_copy, does guarantee to preserve the relative order.
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p stable_partition_copy
- *  \see \p partition
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-
-/*! \p partition_copy differs from \ref partition only in that the reordered
- *  sequence is written to difference output sequences, rather than in place.
- *
- *  \p partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred. All of the elements that satisfy \p pred are copied
- *  to the range beginning at \p out_true and all the elements that fail to satisfy it
- *  are copied to the range beginning at \p out_false.
- *
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type
- *          is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input range shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p partition_copy to separate a
- *  sequence into two output sequences of even and odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::partition_copy(A, A + N, evens, odds, is_even());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \note The relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition_copy, does guarantee to preserve the relative order.
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p stable_partition_copy
- *  \see \p partition
- */
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-
-/*! \p partition_copy differs from \ref partition only in that the reordered
- *  sequence is written to difference output sequences, rather than in place.
- *
- *  \p partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred which is applied to a range of stencil elements. All of the elements
- *  whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true
- *  and all the elements whose stencil element fails to satisfy it are copied to the range beginning
- *  at \p out_false.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p partition_copy to separate a
- *  sequence into two output sequences of even and odd numbers using the \p thrust::host execution
- *  policy for parallelization.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity<int>());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0,  1}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \note The relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition_copy, does guarantee to preserve the relative order.
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p stable_partition_copy
- *  \see \p partition
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-
-/*! \p partition_copy differs from \ref partition only in that the reordered
- *  sequence is written to difference output sequences, rather than in place.
- *
- *  \p partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred which is applied to a range of stencil elements. All of the elements
- *  whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true
- *  and all the elements whose stencil element fails to satisfy it are copied to the range beginning
- *  at \p out_false.
- *
- *  \param first The beginning of the sequence to reorder.
- *  \param last The end of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p partition_copy to separate a
- *  sequence into two output sequences of even and odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity<int>());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0,  1}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \note The relative order of elements in the two reordered sequences is not
- *  necessarily the same as it was in the original sequence. A different algorithm,
- *  \ref stable_partition_copy, does guarantee to preserve the relative order.
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p stable_partition_copy
- *  \see \p partition
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-
-/*! \p stable_partition is much like \ref partition : it reorders the elements in the
- *  range <tt>[first, last)</tt> based on the function object \p pred, such that all of
- *  the elements that satisfy \p pred precede all of the elements that fail to satisfy
- *  it. The postcondition is that, for some iterator \p middle in the range
- *  <tt>[first, last)</tt>, <tt>pred(*i)</tt> is \c true for every iterator \c i in the
- *  range <tt>[first,middle)</tt> and \c false for every iterator \c i in the range
- *  <tt>[middle, last)</tt>. The return value of \p stable_partition is \c middle.
- *
- *  \p stable_partition differs from \ref partition in that \p stable_partition is
- *  guaranteed to preserve relative order. That is, if \c x and \c y are elements in
- *  <tt>[first, last)</tt>, and \c stencil_x and \c stencil_y are the stencil elements
- *  in corresponding positions within <tt>[stencil, stencil + (last - first))</tt>,
- *  and <tt>pred(stencil_x) == pred(stencil_y)</tt>, and if \c x precedes
- *  \c y, then it will still be true after \p stable_partition that \c x precedes \c y.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements which do not satisfy pred.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type,
- *          and \p ForwardIterator is mutable.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p stable_partition to reorder a
- *  sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::stable_partition(thrust::host,
- *                           A, A + N,
- *                           is_even());
- *  // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_partition.html
- *  \see \p partition
- *  \see \p stable_partition_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred);
-
-
-/*! \p stable_partition is much like \ref partition : it reorders the elements in the
- *  range <tt>[first, last)</tt> based on the function object \p pred, such that all of
- *  the elements that satisfy \p pred precede all of the elements that fail to satisfy
- *  it. The postcondition is that, for some iterator \p middle in the range
- *  <tt>[first, last)</tt>, <tt>pred(*i)</tt> is \c true for every iterator \c i in the
- *  range <tt>[first,middle)</tt> and \c false for every iterator \c i in the range
- *  <tt>[middle, last)</tt>. The return value of \p stable_partition is \c middle.
- *
- *  \p stable_partition differs from \ref partition in that \p stable_partition is
- *  guaranteed to preserve relative order. That is, if \c x and \c y are elements in
- *  <tt>[first, last)</tt>, and \c stencil_x and \c stencil_y are the stencil elements
- *  in corresponding positions within <tt>[stencil, stencil + (last - first))</tt>,
- *  and <tt>pred(stencil_x) == pred(stencil_y)</tt>, and if \c x precedes
- *  \c y, then it will still be true after \p stable_partition that \c x precedes \c y.
- *
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements which do not satisfy pred.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type,
- *          and \p ForwardIterator is mutable.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p stable_partition to reorder a
- *  sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::stable_partition(A, A + N,
- *                            is_even());
- *  // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_partition.html
- *  \see \p partition
- *  \see \p stable_partition_copy
- */
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred);
-
-
-/*! \p stable_partition is much like \p partition: it reorders the elements in the
- *  range <tt>[first, last)</tt> based on the function object \p pred applied to a stencil
- *  range <tt>[stencil, stencil + (last - first))</tt>, such that all of
- *  the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose
- *  corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*stencil_i)</tt> is \c true for every iterator
- *  \c stencil_i in the range <tt>[stencil,stencil + (middle - first))</tt> and \c false for every iterator \c stencil_i
- *  in the range <tt>[stencil + (middle - first), stencil + (last - first))</tt>.
- *  The return value of \p stable_partition is \c middle.
- *
- *  \p stable_partition differs from \ref partition in that \p stable_partition is
- *  guaranteed to preserve relative order. That is, if \c x and \c y are elements in
- *  <tt>[first, last)</tt>, such that <tt>pred(x) == pred(y)</tt>, and if \c x precedes
- *  \c y, then it will still be true after \p stable_partition that \c x precedes \c y.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements whose stencil elements do not satisfy \p pred.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap with the range <tt>[stencil, stencil + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p stable_partition to reorder a
- *  sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::stable_partition(thrust::host, A, A + N, S, is_even());
- *  // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  // S is unmodified
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_partition.html
- *  \see \p partition
- *  \see \p stable_partition_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred);
-
-
-/*! \p stable_partition is much like \p partition: it reorders the elements in the
- *  range <tt>[first, last)</tt> based on the function object \p pred applied to a stencil
- *  range <tt>[stencil, stencil + (last - first))</tt>, such that all of
- *  the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose
- *  corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator
- *  \c middle in the range <tt>[first, last)</tt>, <tt>pred(*stencil_i)</tt> is \c true for every iterator
- *  \c stencil_i in the range <tt>[stencil,stencil + (middle - first))</tt> and \c false for every iterator \c stencil_i
- *  in the range <tt>[stencil + (middle - first), stencil + (last - first))</tt>.
- *  The return value of \p stable_partition is \c middle.
- *
- *  \p stable_partition differs from \ref partition in that \p stable_partition is
- *  guaranteed to preserve relative order. That is, if \c x and \c y are elements in
- *  <tt>[first, last)</tt>, such that <tt>pred(x) == pred(y)</tt>, and if \c x precedes
- *  \c y, then it will still be true after \p stable_partition that \c x precedes \c y.
- *
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return An iterator referring to the first element of the second partition, that is,
- *          the sequence of the elements whose stencil elements do not satisfy \p pred.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap with the range <tt>[stencil, stencil + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p stable_partition to reorder a
- *  sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  const int N = sizeof(A)/sizeof(int);
- *  thrust::stable_partition(A, A + N, S, is_even());
- *  // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  // S is unmodified
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_partition.html
- *  \see \p partition
- *  \see \p stable_partition_copy
- */
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred);
-
-
-/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered
- *  sequence is written to different output sequences, rather than in place.
- *
- *  \p stable_partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred. All of the elements that satisfy \p pred are copied
- *  to the range beginning at \p out_true and all the elements that fail to satisfy it
- *  are copied to the range beginning at \p out_false.
- *
- *  \p stable_partition_copy differs from \ref partition_copy in that
- *  \p stable_partition_copy is guaranteed to preserve relative order. That is, if
- *  \c x and \c y are elements in <tt>[first, last)</tt>, such that
- *  <tt>pred(x) == pred(y)</tt>, and if \c x precedes \c y, then it will still be true
- *  after \p stable_partition_copy that \c x precedes \c y in the output.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type
- *          is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p stable_partition_copy to
- *  reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(thrust::host, A, A + N, evens, odds, is_even());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p partition_copy
- *  \see \p stable_partition
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered
- *  sequence is written to different output sequences, rather than in place.
- *
- *  \p stable_partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred. All of the elements that satisfy \p pred are copied
- *  to the range beginning at \p out_true and all the elements that fail to satisfy it
- *  are copied to the range beginning at \p out_false.
- *
- *  \p stable_partition_copy differs from \ref partition_copy in that
- *  \p stable_partition_copy is guaranteed to preserve relative order. That is, if
- *  \c x and \c y are elements in <tt>[first, last)</tt>, such that
- *  <tt>pred(x) == pred(y)</tt>, and if \c x precedes \c y, then it will still be true
- *  after \p stable_partition_copy that \c x precedes \c y in the output.
- *
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type
- *          is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p stable_partition_copy to
- *  reorder a sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(A, A + N, evens, odds, is_even());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p partition_copy
- *  \see \p stable_partition
- */
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered
- *  sequence is written to different output sequences, rather than in place.
- *
- *  \p stable_partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred which is applied to a range of stencil elements. All of the elements
- *  whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true
- *  and all the elements whose stencil element fails to satisfy it are copied to the range beginning
- *  at \p out_false.
- *
- *  \p stable_partition_copy differs from \ref partition_copy in that
- *  \p stable_partition_copy is guaranteed to preserve relative order. That is, if
- *  \c x and \c y are elements in <tt>[first, last)</tt>, such that
- *  <tt>pred(x) == pred(y)</tt>, and if \c x precedes \c y, then it will still be true
- *  after \p stable_partition_copy that \c x precedes \c y in the output.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p stable_partition_copy to
- *  reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity<int>());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0,  1}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p partition_copy
- *  \see \p stable_partition
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered
- *  sequence is written to different output sequences, rather than in place.
- *
- *  \p stable_partition_copy copies the elements <tt>[first, last)</tt> based on the
- *  function object \p pred which is applied to a range of stencil elements. All of the elements
- *  whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true
- *  and all the elements whose stencil element fails to satisfy it are copied to the range beginning
- *  at \p out_false.
- *
- *  \p stable_partition_copy differs from \ref partition_copy in that
- *  \p stable_partition_copy is guaranteed to preserve relative order. That is, if
- *  \c x and \c y are elements in <tt>[first, last)</tt>, such that
- *  <tt>pred(x) == pred(y)</tt>, and if \c x precedes \c y, then it will still be true
- *  after \p stable_partition_copy that \c x precedes \c y in the output.
- *
- *  \param first The first element of the sequence to reorder.
- *  \param last One position past the last element of the sequence to reorder.
- *  \param stencil The beginning of the stencil sequence.
- *  \param out_true The destination of the resulting sequence of elements which satisfy \p pred.
- *  \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred.
- *  \param pred A function object which decides to which partition each element of the
- *              sequence <tt>[first, last)</tt> belongs.
- *  \return A \p pair p such that <tt>p.first</tt> is the end of the output range beginning
- *          at \p out_true and <tt>p.second</tt> is the end of the output range beginning at
- *          \p out_false.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The input ranges shall not overlap with either output range.
- *
- *  The following code snippet demonstrates how to use \p stable_partition_copy to
- *  reorder a sequence so that even numbers precede odd numbers.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *  int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0,  1};
- *  int result[10];
- *  const int N = sizeof(A)/sizeof(int);
- *  int *evens = result;
- *  int *odds  = result + 5;
- *  thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity<int>());
- *  // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0,  1}
- *  // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
- *  // evens points to {2, 4, 6, 8, 10}
- *  // odds points to {1, 3, 5, 7, 9}
- *  \endcode
- *
- *  \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf
- *  \see \p partition_copy
- *  \see \p stable_partition
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-/*! \} // end stream_compaction
- */
-
-/*! \} // end reordering
- */
-
-/*! \addtogroup searching
- *  \{
- */
-
-
-/*! \p partition_point returns an iterator pointing to the end of the true
- *  partition of a partitioned range. \p partition_point requires the input range
- *  <tt>[first,last)</tt> to be a partition; that is, all elements which satisfy
- *  <tt>pred</tt> shall appear before those that do not.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range to consider.
- *  \param last The end of the range to consider.
- *  \param pred A function object which decides to which partition each element of the
- *              range <tt>[first, last)</tt> belongs.
- *  \return An iterator \c mid such that <tt>all_of(first, mid, pred)</tt>
- *          and <tt>none_of(mid, last, pred)</tt> are both true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall be partitioned by \p pred.
- *
- *  \note Though similar, \p partition_point is not redundant with \p find_if_not.
- *        \p partition_point's precondition provides an opportunity for a
- *        faster implemention.
- *
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  
- *  ...
- *
- *  int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9};
- *  int * B = thrust::partition_point(thrust::host, A, A + 10, is_even());
- *  // B - A is 5
- *  // [A, B) contains only even values
- *  \endcode
- *
- *  \see \p partition
- *  \see \p find_if_not
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate>
-  ForwardIterator partition_point(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred);
-
-
-/*! \p partition_point returns an iterator pointing to the end of the true
- *  partition of a partitioned range. \p partition_point requires the input range
- *  <tt>[first,last)</tt> to be a partition; that is, all elements which satisfy
- *  <tt>pred</tt> shall appear before those that do not.
- *  \param first The beginning of the range to consider.
- *  \param last The end of the range to consider.
- *  \param pred A function object which decides to which partition each element of the
- *              range <tt>[first, last)</tt> belongs.
- *  \return An iterator \c mid such that <tt>all_of(first, mid, pred)</tt>
- *          and <tt>none_of(mid, last, pred)</tt> are both true.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall be partitioned by \p pred.
- *
- *  \note Though similar, \p partition_point is not redundant with \p find_if_not.
- *        \p partition_point's precondition provides an opportunity for a
- *        faster implemention.
- *
- *  \code
- *  #include <thrust/partition.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  
- *  ...
- *
- *  int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9};
- *  int * B = thrust::partition_point(A, A + 10, is_even());
- *  // B - A is 5
- *  // [A, B) contains only even values
- *  \endcode
- *
- *  \see \p partition
- *  \see \p find_if_not
- */
-template<typename ForwardIterator, typename Predicate>
-  ForwardIterator partition_point(ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred);
-
-/*! \} // searching
- */
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup predicates
- *  \{
- */
-
-
-/*! \p is_partitioned returns \c true if the given range 
- *  is partitioned with respect to a predicate, and \c false otherwise.
- *
- *  Specifically, \p is_partitioned returns \c true if <tt>[first, last)</tt>
- *  is empty of if <tt>[first, last)</tt> is partitioned by \p pred, i.e. if
- *  all elements that satisfy \p pred appear before those that do not.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range to consider.
- *  \param last The end of the range to consider.
- *  \param pred A function object which decides to which partition each element of the
- *         range <tt>[first, last)</tt> belongs.
- *  \return \c true if the range <tt>[first, last)</tt> is partitioned with respect
- *          to \p pred, or if <tt>[first, last)</tt> is empty. \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  
- *  \code
- *  #include <thrust/partition.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  
- *  ...
- *
- *  int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9};
- *  int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *
- *  thrust::is_partitioned(thrust::host, A, A + 10); // returns true
- *  thrust::is_partitioned(thrust::host, B, B + 10); // returns false
- *  \endcode
- *
- *  \see \p partition
- */
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-  bool is_partitioned(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-
-/*! \p is_partitioned returns \c true if the given range 
- *  is partitioned with respect to a predicate, and \c false otherwise.
- *
- *  Specifically, \p is_partitioned returns \c true if <tt>[first, last)</tt>
- *  is empty of if <tt>[first, last)</tt> is partitioned by \p pred, i.e. if
- *  all elements that satisfy \p pred appear before those that do not.
- *
- *  \param first The beginning of the range to consider.
- *  \param last The end of the range to consider.
- *  \param pred A function object which decides to which partition each element of the
- *         range <tt>[first, last)</tt> belongs.
- *  \return \c true if the range <tt>[first, last)</tt> is partitioned with respect
- *          to \p pred, or if <tt>[first, last)</tt> is empty. \c false, otherwise.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  
- *  \code
- *  #include <thrust/partition.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int &x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  
- *  ...
- *
- *  int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9};
- *  int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
- *
- *  thrust::is_partitioned(A, A + 10); // returns true
- *  thrust::is_partitioned(B, B + 10); // returns false
- *  \endcode
- *
- *  \see \p partition
- */
-template<typename InputIterator, typename Predicate>
-  bool is_partitioned(InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-
-/*! \} // end predicates
- *  \} // end reductions
- */
-
-
-} // end thrust
-
-#include <thrust/detail/partition.inl>
-
diff --git a/compat/thrust/random.h b/compat/thrust/random.h
deleted file mode 100644
index 5a2c00da86..0000000000
--- a/compat/thrust/random.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file random.h
- *  \brief Pseudo-random number generators.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/cstdint.h>
-
-// RNGs
-#include <thrust/random/discard_block_engine.h>
-#include <thrust/random/linear_congruential_engine.h>
-#include <thrust/random/linear_feedback_shift_engine.h>
-#include <thrust/random/subtract_with_carry_engine.h>
-#include <thrust/random/xor_combine_engine.h>
-
-// distributions
-#include <thrust/random/uniform_int_distribution.h>
-#include <thrust/random/uniform_real_distribution.h>
-#include <thrust/random/normal_distribution.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup random Random Number Generation
- *  \{
- */
-
-
-/*! \namespace thrust::random
- *  \brief \p thrust::random is the namespace which contains random number engine class templates,
- *  random number engine adaptor class templates, engines with predefined parameters,
- *  and random number distribution class templates. They are provided in a separate namespace
- *  for import convenience but are also aliased in the top-level \p thrust namespace for
- *  easy access.
- */
-namespace random
-{
-
-/*! \addtogroup predefined_random Random Number Engines with Predefined Parameters
- *  \ingroup random
- *  \{
- */
-
-/*! \typedef ranlux24
- *  \brief A random number engine with predefined parameters which implements the
- *         RANLUX level-3 random number generation algorithm.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24
- *        shall produce the value \c 9901578 .
- */
-typedef discard_block_engine<ranlux24_base, 223, 23> ranlux24;
-
-
-/*! \typedef ranlux48
- *  \brief A random number engine with predefined parameters which implements the
- *         RANLUX level-4 random number generation algorithm.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48
- *        shall produce the value \c 88229545517833 .
- */
-typedef discard_block_engine<ranlux48_base, 389, 11> ranlux48;
-
-
-/*! \typedef taus88
- *  \brief A random number engine with predefined parameters which implements
- *         L'Ecuyer's 1996 three-component Tausworthe random number generator.
- *
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p taus88
- *        shall produce the value \c 3535848941 .
- */
-typedef xor_combine_engine<
-  linear_feedback_shift_engine<thrust::detail::uint32_t, 32u, 31u, 13u, 12u>,
-  0,
-  xor_combine_engine<
-    linear_feedback_shift_engine<thrust::detail::uint32_t, 32u, 29u,  2u,  4u>, 0,
-    linear_feedback_shift_engine<thrust::detail::uint32_t, 32u, 28u,  3u, 17u>, 0
-  >,
-  0
-> taus88;
-
-/*! \typedef default_random_engine
- *  \brief An implementation-defined "default" random number engine.
- *  \note \p default_random_engine is currently an alias for \p minstd_rand, and may change
- *        in a future version.
- */
-typedef minstd_rand default_random_engine;
-
-/*! \} // end predefined_random
- */
-
-} // end random
-
-
-/*! \} // end random
- */
-
-// import names into thrust::
-using random::ranlux24;
-using random::ranlux48;
-using random::taus88;
-using random::default_random_engine;
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/discard_block_engine.inl b/compat/thrust/random/detail/discard_block_engine.inl
deleted file mode 100644
index 5f01bd1816..0000000000
--- a/compat/thrust/random/detail/discard_block_engine.inl
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/discard_block_engine.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename Engine, size_t p, size_t r>
-  discard_block_engine<Engine,p,r>
-    ::discard_block_engine()
-      : m_e(), m_n(0)
-{}
-
-
-template<typename Engine, size_t p, size_t r>
-  discard_block_engine<Engine,p,r>
-    ::discard_block_engine(result_type s)
-      : m_e(s), m_n(0)
-{}
-
-
-template<typename Engine, size_t p, size_t r>
-  discard_block_engine<Engine,p,r>
-    ::discard_block_engine(const base_type &urng)
-      : m_e(urng), m_n(0)
-{}
-
-
-template<typename Engine, size_t p, size_t r>
-  void discard_block_engine<Engine,p,r>
-    ::seed(void)
-{
-  m_e.seed();
-  m_n = 0;
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  void discard_block_engine<Engine,p,r>
-    ::seed(result_type s)
-{
-  m_e.seed(s);
-  m_n = 0;
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  typename discard_block_engine<Engine,p,r>::result_type
-    discard_block_engine<Engine,p,r>
-      ::operator()(void)
-{
-  if(m_n >= used_block)
-  {
-    m_e.discard(block_size - m_n);
-//    for(; m_n < block_size; ++m_n)
-//      m_e();
-    m_n = 0;
-  }
-
-  ++m_n;
-
-  return m_e();
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  void discard_block_engine<Engine,p,r>
-    ::discard(unsigned long long z)
-{
-  // XXX this should be accelerated
-  for(; z > 0; --z)
-  {
-    this->operator()();
-  } // end for
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  const typename discard_block_engine<Engine,p,r>::base_type &
-    discard_block_engine<Engine,p,r>
-      ::base(void) const
-{
-  return m_e;
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& discard_block_engine<Engine,p,r>
-      ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags & fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  // output the base engine followed by n
-  os << m_e << space << m_n;
-
-  // restore flags & fill character
-  os.flags(flags);
-  os.fill(fill);
-
-  return os;
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& discard_block_engine<Engine,p,r>
-      ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base  ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  // input the base engine and then n
-  is >> m_e >> m_n;
-
-  // restore old flags
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename Engine, size_t p, size_t r>
-  bool discard_block_engine<Engine,p,r>
-    ::equal(const discard_block_engine<Engine,p,r> &rhs) const
-{
-  return (m_e == rhs.m_e) && (m_n == rhs.m_n);
-}
-
-
-template<typename Engine, size_t p, size_t r,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const discard_block_engine<Engine,p,r> &e)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,e);
-}
-
-
-template<typename Engine, size_t p, size_t r,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           discard_block_engine<Engine,p,r> &e)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,e);
-}
-
-
-template<typename Engine, size_t p, size_t r>
-bool operator==(const discard_block_engine<Engine,p,r> &lhs,
-                const discard_block_engine<Engine,p,r> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename Engine, size_t p, size_t r>
-bool operator!=(const discard_block_engine<Engine,p,r> &lhs,
-                const discard_block_engine<Engine,p,r> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/linear_congruential_engine.inl b/compat/thrust/random/detail/linear_congruential_engine.inl
deleted file mode 100644
index f040563cc2..0000000000
--- a/compat/thrust/random/detail/linear_congruential_engine.inl
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/linear_congruential_engine.h>
-#include <thrust/random/detail/mod.h>
-#include <thrust/random/detail/random_core_access.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  linear_congruential_engine<UIntType,a,c,m>
-    ::linear_congruential_engine(result_type s)
-{
-  seed(s);
-} // end linear_congruential_engine::linear_congruential_engine()
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  void linear_congruential_engine<UIntType,a,c,m>
-    ::seed(result_type s)
-{
-  if((detail::mod<UIntType, 1, 0, m>(c) == 0) &&
-     (detail::mod<UIntType, 1, 0, m>(s) == 0))
-    m_x = detail::mod<UIntType, 1, 0, m>(1);
-  else
-    m_x = detail::mod<UIntType, 1, 0, m>(s);
-} // end linear_congruential_engine::seed()
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  typename linear_congruential_engine<UIntType,a,c,m>::result_type
-    linear_congruential_engine<UIntType,a,c,m>
-      ::operator()(void)
-{
-  m_x = detail::mod<UIntType,a,c,m>(m_x);
-  return m_x;
-} // end linear_congruential_engine::operator()()
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  void linear_congruential_engine<UIntType,a,c,m>
-    ::discard(unsigned long long z)
-{
-  thrust::random::detail::linear_congruential_engine_discard::discard(*this,z);
-} // end linear_congruential_engine::discard()
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& linear_congruential_engine<UIntType,a,c,m>
-      ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags & fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(os.widen(' '));
-
-  // output one word of state
-  os << m_x;
-
-  // restore flags & fill character
-  os.flags(flags);
-  os.fill(fill);
-
-  return os;
-}
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& linear_congruential_engine<UIntType,a,c,m>
-      ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base     ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::dec);
-
-  // input one word of state
-  is >> m_x;
-
-  // restore flags
-  is.flags(flags);
-
-  return is;
-}
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-bool linear_congruential_engine<UIntType,a,c,m>
-  ::equal(const linear_congruential_engine<UIntType,a,c,m> &rhs) const
-{
-  return m_x == rhs.m_x;
-}
-
-
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_>
-__host__ __device__
-bool operator==(const linear_congruential_engine<UIntType_,a_,c_,m_> &lhs,
-                const linear_congruential_engine<UIntType_,a_,c_,m_> &rhs)
-{
-  return detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-bool operator!=(const linear_congruential_engine<UIntType,a,c,m> &lhs,
-                const linear_congruential_engine<UIntType,a,c,m> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const linear_congruential_engine<UIntType_,a_,c_,m_> &e)
-{
-  return detail::random_core_access::stream_out(os,e);
-}
-
-
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           linear_congruential_engine<UIntType_,a_,c_,m_> &e)
-{
-  return detail::random_core_access::stream_in(is,e);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/linear_congruential_engine_discard.h b/compat/thrust/random/detail/linear_congruential_engine_discard.h
deleted file mode 100644
index f4ec23364e..0000000000
--- a/compat/thrust/random/detail/linear_congruential_engine_discard.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/cstdint.h>
-#include <thrust/random/detail/mod.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-namespace detail
-{
-
-
-template<typename UIntType, UIntType a, unsigned long long c, UIntType m>
-  struct linear_congruential_engine_discard_implementation
-{
-  __host__ __device__
-  static void discard(UIntType &state, unsigned long long z)
-  {
-    for(; z > 0; --z)
-    {
-      state = detail::mod<UIntType,a,c,m>(state);
-    }
-  }
-}; // end linear_congruential_engine_discard
-
-
-// specialize for small integers and c == 0
-// XXX figure out a robust implemenation of this for any unsigned integer type later
-template<thrust::detail::uint32_t a, thrust::detail::uint32_t m>
-  struct linear_congruential_engine_discard_implementation<thrust::detail::uint32_t,a,0,m>
-{
-  __host__ __device__
-  static void discard(thrust::detail::uint32_t &state, unsigned long long z)
-  {
-    const thrust::detail::uint32_t modulus = m;
-
-    // XXX we need to use unsigned long long here or we will encounter overflow in the
-    //     multiplies below
-    //     figure out a robust implementation of this later
-    unsigned long long multiplier = a;
-    unsigned long long multiplier_to_z = 1;
-    
-    // see http://en.wikipedia.org/wiki/Modular_exponentiation
-    while(z > 0)
-    {
-      if(z & 1)
-      {
-        // multiply in this bit's contribution while using modulus to keep result small
-        multiplier_to_z = (multiplier_to_z * multiplier) % modulus;
-      }
-
-      // move to the next bit of the exponent, square (and mod) the base accordingly
-      z >>= 1;
-      multiplier = (multiplier * multiplier) % modulus;
-    }
-
-    state = static_cast<thrust::detail::uint32_t>((multiplier_to_z * state) % modulus);
-  }
-}; // end linear_congruential_engine_discard
-
-
-struct linear_congruential_engine_discard
-{
-  template<typename LinearCongruentialEngine>
-  __host__ __device__
-  static void discard(LinearCongruentialEngine &lcg, unsigned long long z)
-  {
-    typedef typename LinearCongruentialEngine::result_type result_type;
-    const result_type c = LinearCongruentialEngine::increment;
-    const result_type a = LinearCongruentialEngine::multiplier;
-    const result_type m = LinearCongruentialEngine::modulus;
-    
-    // XXX WAR unused variable warnings
-    (void) c;
-    (void) a;
-    (void) m;
-
-    linear_congruential_engine_discard_implementation<result_type,a,c,m>::discard(lcg.m_x, z);
-  }
-}; // end linear_congruential_engine_discard
-
-
-} // end detail
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine.inl b/compat/thrust/random/detail/linear_feedback_shift_engine.inl
deleted file mode 100644
index 4e8dad5140..0000000000
--- a/compat/thrust/random/detail/linear_feedback_shift_engine.inl
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/linear_feedback_shift_engine.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  linear_feedback_shift_engine<UIntType,w,k,q,s>
-    ::linear_feedback_shift_engine(result_type value)
-{
-  seed(value);
-} // end linear_feedback_shift_engine::linear_feedback_shift_engine()
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  void linear_feedback_shift_engine<UIntType,w,k,q,s>
-    ::seed(result_type value)
-{
-  m_value = value;
-} // end linear_feedback_shift_engine::seed()
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  typename linear_feedback_shift_engine<UIntType,w,k,q,s>::result_type
-    linear_feedback_shift_engine<UIntType,w,k,q,s>
-      ::operator()(void)
-{
-  const UIntType b = (((m_value << q) ^ m_value) & wordmask) >> (k-s);
-  const UIntType mask = ( (~static_cast<UIntType>(0)) << (w-k) ) & wordmask;
-  m_value = ((m_value & mask) << s) ^ b;
-  return m_value;
-} // end linear_feedback_shift_engine::operator()()
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  void linear_feedback_shift_engine<UIntType,w,k,q,s>
-    ::discard(unsigned long long z)
-{
-  for(; z > 0; --z)
-  {
-    this->operator()();
-  } // end for
-} // end linear_feedback_shift_engine::discard()
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& linear_feedback_shift_engine<UIntType,w,k,q,s>
-      ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags & fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(os.widen(' '));
-
-  // output one word of state
-  os << m_value;
-
-  // restore flags & fill character
-  os.flags(flags);
-  os.fill(fill);
-
-  return os;
-}
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& linear_feedback_shift_engine<UIntType,w,k,q,s>
-      ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base     ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  // input one word of state
-  is >> m_value;
-
-  // restore flags
-  is.flags(flags);
-
-  return is;
-}
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  bool linear_feedback_shift_engine<UIntType,w,k,q,s>
-    ::equal(const linear_feedback_shift_engine<UIntType,w,k,q,s> &rhs) const
-{
-  return m_value == rhs.m_value;
-}
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-bool operator==(const linear_feedback_shift_engine<UIntType,w,k,q,s> &lhs,
-                const linear_feedback_shift_engine<UIntType,w,k,q,s> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-bool operator!=(const linear_feedback_shift_engine<UIntType,w,k,q,s> &lhs,
-                const linear_feedback_shift_engine<UIntType,w,k,q,s> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &e)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,e);
-}
-
-
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &e)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,e);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h b/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h
deleted file mode 100644
index ed9e51e925..0000000000
--- a/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-namespace thrust
-{
-
-namespace random
-{
-
-namespace detail
-{
-
-template<typename T, int w, int i = w-1>
-  struct linear_feedback_shift_engine_wordmask
-{
-  static const T value =
-    (T(1u) << i) |
-    linear_feedback_shift_engine_wordmask<T, w, i-1>::value;
-}; // end linear_feedback_shift_engine_wordmask
-
-template<typename T, int w>
-  struct linear_feedback_shift_engine_wordmask<T, w, 0>
-{
-  static const T value = 0;
-}; // end linear_feedback_shift_engine_wordmask
-
-} // end detail
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/mod.h b/compat/thrust/random/detail/mod.h
deleted file mode 100644
index ceb2191552..0000000000
--- a/compat/thrust/random/detail/mod.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-namespace thrust
-{
-
-namespace random
-{
-
-namespace detail
-{
-
-template<typename T, T a, T c, T m, bool = (m == 0)>
-  struct static_mod
-{
-  static const T q = m / a;
-  static const T r = m % a;
-
-  __host__ __device__
-  T operator()(T x) const
-  {
-    if(a == 1)
-    {
-      x %= m;
-    }
-    else
-    {
-      T t1 = a * (x % q);
-      T t2 = r * (x / q);
-      if(t1 >= t2)
-      {
-        x = t1 - t2;
-      }
-      else
-      {
-        x = m - t2 + t1;
-      }
-    }
-
-    if(c != 0)
-    {
-      const T d = m - x;
-      if(d > c)
-      {
-        x += c;
-      }
-      else
-      {
-        x = c - d;
-      }
-    }
-
-    return x;
-  }
-}; // end static_mod
-
-
-// Rely on machine overflow handling
-template<typename T, T a, T c, T m>
-  struct static_mod<T,a,c,m,true>
-{
-  __host__ __device__
-  T operator()(T x) const
-  {
-    return a * x + c;
-  }
-}; // end static_mod
-
-template<typename T, T a, T c, T m>
-__host__ __device__
-  T mod(T x)
-{
-  static_mod<T,a,c,m> f;
-  return f(x);
-} // end static_mod
-
-} // end detail
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/normal_distribution.inl b/compat/thrust/random/detail/normal_distribution.inl
deleted file mode 100644
index 1bb55d75b2..0000000000
--- a/compat/thrust/random/detail/normal_distribution.inl
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- *
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/normal_distribution.h>
-#include <thrust/random/uniform_real_distribution.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/detail/integer_traits.h>
-
-// for floating point infinity
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-#include <math_constants.h>
-#else
-#include <limits>
-#endif
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename RealType>
-  normal_distribution<RealType>
-    ::normal_distribution(RealType a, RealType b)
-      :super_t(),m_param(a,b)
-{
-} // end normal_distribution::normal_distribution()
-
-
-template<typename RealType>
-  normal_distribution<RealType>
-    ::normal_distribution(const param_type &parm)
-      :super_t(),m_param(parm)
-{
-} // end normal_distribution::normal_distribution()
-
-
-template<typename RealType>
-  void normal_distribution<RealType>
-    ::reset(void)
-{
-  super_t::reset();
-} // end normal_distribution::reset()
-
-
-template<typename RealType>
-  template<typename UniformRandomNumberGenerator>
-    typename normal_distribution<RealType>::result_type
-      normal_distribution<RealType>
-        ::operator()(UniformRandomNumberGenerator &urng)
-{
-  return operator()(urng, m_param);
-} // end normal_distribution::operator()()
-
-
-template<typename RealType>
-  template<typename UniformRandomNumberGenerator>
-    typename normal_distribution<RealType>::result_type
-      normal_distribution<RealType>
-        ::operator()(UniformRandomNumberGenerator &urng,
-                     const param_type &parm)
-{
-  return super_t::sample(urng, parm.first, parm.second);
-} // end normal_distribution::operator()()
-
-
-template<typename RealType>
-  typename normal_distribution<RealType>::param_type
-    normal_distribution<RealType>
-      ::param(void) const
-{
-  return m_param;
-} // end normal_distribution::param()
-
-
-template<typename RealType>
-  void normal_distribution<RealType>
-    ::param(const param_type &parm)
-{
-  m_param = parm;
-} // end normal_distribution::param()
-
-
-template<typename RealType>
-  typename normal_distribution<RealType>::result_type
-    normal_distribution<RealType>
-      ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  return -this->max();
-} // end normal_distribution::min()
-
-
-template<typename RealType>
-  typename normal_distribution<RealType>::result_type
-    normal_distribution<RealType>
-      ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  // XXX this solution is pretty terrible
-  // we can't use numeric_traits<RealType>::max because nvcc will
-  // complain that it is a __host__ function
-  union
-  {
-    thrust::detail::uint32_t inf_as_int;
-    float result;
-  } hack;
-
-  hack.inf_as_int = 0x7f800000u;
-
-  return hack.result;
-} // end normal_distribution::max()
-
-
-template<typename RealType>
-  typename normal_distribution<RealType>::result_type
-    normal_distribution<RealType>
-      ::mean(void) const
-{
-  return m_param.first;
-} // end normal_distribution::mean()
-
-
-template<typename RealType>
-  typename normal_distribution<RealType>::result_type
-    normal_distribution<RealType>
-      ::stddev(void) const
-{
-  return m_param.second;
-} // end normal_distribution::stddev()
-
-
-template<typename RealType>
-  bool normal_distribution<RealType>
-    ::equal(const normal_distribution &rhs) const
-{
-  return m_param == rhs.param();
-}
-
-
-template<typename RealType>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>&
-      normal_distribution<RealType>
-        ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags and fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  os << mean() << space << stddev();
-
-  // restore old flags and fill character
-  os.flags(flags);
-  os.fill(fill);
-  return os;
-}
-
-
-template<typename RealType>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>&
-      normal_distribution<RealType>
-        ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base  ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  is >> m_param.first >> m_param.second;
-
-  // restore old flags
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename RealType>
-bool operator==(const normal_distribution<RealType> &lhs,
-                const normal_distribution<RealType> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename RealType>
-bool operator!=(const normal_distribution<RealType> &lhs,
-                const normal_distribution<RealType> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const normal_distribution<RealType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,d);
-}
-
-
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           normal_distribution<RealType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,d);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/normal_distribution_base.h b/compat/thrust/random/detail/normal_distribution_base.h
deleted file mode 100644
index d9166112ad..0000000000
--- a/compat/thrust/random/detail/normal_distribution_base.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*
- * Copyright Jens Maurer 2000-2001
- * Distributed under the Boost Software License, Version 1.0. (See
- * accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/random/uniform_real_distribution.h>
-#include <limits>
-#include <cmath>
-
-namespace thrust
-{
-namespace random
-{
-namespace detail
-{
-
-// this version samples the normal distribution directly 
-// and uses the non-standard math function erfcinv
-template<typename RealType>
-  class normal_distribution_nvcc
-{
-  protected:
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev)
-    {
-      typedef typename UniformRandomNumberGenerator::result_type uint_type;
-      const uint_type urng_range = UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min;
-
-      // Constants for conversion
-      const RealType S1 = static_cast<RealType>(1) / urng_range;
-      const RealType S2 = S1 / 2;
-
-      RealType S3 = static_cast<RealType>(-1.4142135623730950488016887242097); // -sqrt(2)
-      
-      // Get the integer value
-      uint_type u = urng() - UniformRandomNumberGenerator::min;
-
-      // Ensure the conversion to float will give a value in the range [0,0.5)
-      if(u > (urng_range / 2))
-      {
-        u = urng_range - u;
-        S3 = -S3;
-      }
-
-      // Convert to floating point in [0,0.5)
-      RealType p = u*S1 + S2;
-
-      // Apply inverse error function
-      return mean + stddev * S3 * erfcinv(2 * p);
-    }
-
-    // no-op
-    __host__ __device__
-    void reset() {}
-};
-
-// this version samples the normal distribution using 
-// Marsaglia's "polar method"
-template<typename RealType>
-  class normal_distribution_portable
-{
-  protected:
-    normal_distribution_portable()
-      : m_valid(false)
-    {}
-
-    normal_distribution_portable(const normal_distribution_portable &other)
-      : m_valid(other.m_valid)
-    {}
-
-    void reset()
-    {
-      m_valid = false;
-    }
-
-    // note that we promise to call this member function with the same mean and stddev
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev)
-    {
-      // implementation from Boost
-      // allow for Koenig lookup
-      using std::sqrt; using std::log; using std::sin; using std::cos;
-
-      if(!m_valid)
-      {
-        uniform_real_distribution<RealType> u01;
-        m_r1 = u01(urng);
-        m_r2 = u01(urng);
-        m_cached_rho = sqrt(-RealType(2) * log(RealType(1)-m_r2));
-
-        m_valid = true;
-      }
-      else
-      {
-        m_valid = false;
-      }
-
-      const RealType pi = RealType(3.14159265358979323846);
-
-      RealType result = m_cached_rho * (m_valid ?
-                          cos(RealType(2)*pi*m_r1) :
-                          sin(RealType(2)*pi*m_r1));
-
-      return result;
-    }
-
-  private:
-    RealType m_r1, m_r2, m_cached_rho;
-    bool m_valid;
-};
-
-template<typename RealType>
-  struct normal_distribution_base
-{
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  typedef normal_distribution_nvcc<RealType> type;
-#else
-  typedef normal_distribution_portable<RealType> type;
-#endif
-};
-
-} // end detail
-} // end random
-} // end thrust
-
diff --git a/compat/thrust/random/detail/random_core_access.h b/compat/thrust/random/detail/random_core_access.h
deleted file mode 100644
index 81f58e2207..0000000000
--- a/compat/thrust/random/detail/random_core_access.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-namespace thrust
-{
-
-namespace random
-{
-
-namespace detail
-{
-
-struct random_core_access
-{
-
-template<typename OStream, typename EngineOrDistribution>
-static OStream &stream_out(OStream &os, const EngineOrDistribution &x)
-{
-  return x.stream_out(os);
-}
-
-template<typename IStream, typename EngineOrDistribution>
-static IStream &stream_in(IStream &is, EngineOrDistribution &x)
-{
-  return x.stream_in(is);
-}
-
-template<typename EngineOrDistribution>
-__host__ __device__
-static bool equal(const EngineOrDistribution &lhs, const EngineOrDistribution &rhs)
-{
-  return lhs.equal(rhs);
-}
-
-}; // end random_core_access
-
-} // end detail
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/subtract_with_carry_engine.inl b/compat/thrust/random/detail/subtract_with_carry_engine.inl
deleted file mode 100644
index a58b2665b2..0000000000
--- a/compat/thrust/random/detail/subtract_with_carry_engine.inl
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/linear_congruential_engine.h>
-#include <thrust/random/subtract_with_carry_engine.h>
-#include <thrust/random/detail/mod.h>
-#include <thrust/random/detail/random_core_access.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  subtract_with_carry_engine<UIntType,w,s,r>
-    ::subtract_with_carry_engine(result_type value)
-{
-  seed(value);
-} // end subtract_with_carry_engine::subtract_with_carry_engine()
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  void subtract_with_carry_engine<UIntType,w,s,r>
-    ::seed(result_type value)
-{
-  thrust::random::linear_congruential_engine<result_type,
-    40014u, 0u, 2147483563u> e(value == 0u ? default_seed : value);
-
-  // initialize state
-  for(size_t i = 0; i < long_lag; ++i)
-  {
-    m_x[i] = detail::mod<UIntType, 1, 0, modulus>(e());
-  } // end for i
-
-  m_carry = (m_x[long_lag-1] == 0);
-  m_k = 0;
-} // end subtract_with_carry_engine::seed()
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  typename subtract_with_carry_engine<UIntType,w,s,r>::result_type
-    subtract_with_carry_engine<UIntType,w,s,r>
-      ::operator()(void)
-{
-  // XXX we probably need to cache these m_x[m_k] in a register
-  //     maybe we need to cache the use of all member variables
-  int short_index = m_k - short_lag;
-  if(short_index < 0)
-    short_index += long_lag;
-  result_type xi;
-  if (m_x[short_index] >= m_x[m_k] + m_carry)
-  {
-    // x(n) >= 0
-    xi =  m_x[short_index] - m_x[m_k] - m_carry;
-    m_carry = 0;
-  }
-  else
-  {
-    // x(n) < 0
-    xi = modulus - m_x[m_k] - m_carry + m_x[short_index];
-    m_carry = 1;
-  }
-  m_x[m_k] = xi;
-  ++m_k;
-  if(m_k >= long_lag)
-    m_k = 0;
-  return xi;
-} // end subtract_with_carry_engine::operator()()
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  void subtract_with_carry_engine<UIntType,w,s,r>
-    ::discard(unsigned long long z)
-{
-  for(; z > 0; --z)
-  {
-    this->operator()();
-  } // end for
-} // end subtract_with_carry_engine::discard()
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& subtract_with_carry_engine<UIntType,w,s,r>
-      ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base     ios_base;
-                  
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill  = os.fill();
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  const UIntType long_lag = r;
-                                                          
-  for(size_t i = 0; i < r; ++i)
-    os << m_x[(i + m_k) % long_lag] << space;
-  os << m_carry;
-                                                                          
-  os.flags(flags);
-  os.fill(fill);
-  return os;
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  template<typename CharType, typename Traits>
-    std::basic_istream<CharType,Traits>& subtract_with_carry_engine<UIntType,w,s,r>
-      ::stream_in(std::basic_istream<CharType,Traits> &is)
-{
-  typedef std::basic_istream<CharType,Traits> istream_type;
-  typedef typename istream_type::ios_base     ios_base;
-
-  const typename ios_base::fmtflags flags = is.flags();
-  is.flags(ios_base::dec | ios_base::skipws);
-
-  for(size_t i = 0; i < r; ++i)
-    is >> m_x[i];
-  is >> m_carry;
-
-  m_k = 0;
-
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  bool subtract_with_carry_engine<UIntType,w,s,r>
-    ::equal(const subtract_with_carry_engine<UIntType,w,s,r> &rhs) const
-{
-  const UIntType long_lag = r;
-
-  bool result = true;
-  for(size_t i = 0; i < r; ++i)
-  {
-    result &= (m_x[(i + m_k) % long_lag] == rhs.m_x[(i + rhs.m_k) % long_lag]);
-  }
-
-  // XXX not sure if this last check is necessary
-  result &= (m_carry == rhs.m_carry);
-
-  return result;
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r,
-         typename CharT, typename Traits>
-  std::basic_ostream<CharT,Traits>&
-    operator<<(std::basic_ostream<CharT,Traits> &os,
-               const subtract_with_carry_engine<UIntType,w,s,r> &e)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,e);
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r,
-         typename CharType, typename Traits>
-  std::basic_istream<CharType,Traits>&
-    operator>>(std::basic_istream<CharType,Traits> &is,
-               subtract_with_carry_engine<UIntType,w,s,r> &e)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,e);
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  bool operator==(const subtract_with_carry_engine<UIntType,w,s,r> &lhs,
-                  const subtract_with_carry_engine<UIntType,w,s,r> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename UIntType, size_t w, size_t s, size_t r>
-  bool operator!=(const subtract_with_carry_engine<UIntType,w,s,r> &lhs,
-                  const subtract_with_carry_engine<UIntType,w,s,r> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/uniform_int_distribution.inl b/compat/thrust/random/detail/uniform_int_distribution.inl
deleted file mode 100644
index e92754c5e7..0000000000
--- a/compat/thrust/random/detail/uniform_int_distribution.inl
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/uniform_int_distribution.h>
-#include <thrust/random/uniform_real_distribution.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename IntType>
-  uniform_int_distribution<IntType>
-    ::uniform_int_distribution(IntType a, IntType b)
-      :m_param(a,b)
-{
-} // end uniform_int_distribution::uniform_int_distribution()
-
-
-template<typename IntType>
-  uniform_int_distribution<IntType>
-    ::uniform_int_distribution(const param_type &parm)
-      :m_param(parm)
-{
-} // end uniform_int_distribution::uniform_int_distribution()
-
-
-template<typename IntType>
-  void uniform_int_distribution<IntType>
-    ::reset(void)
-{
-} // end uniform_int_distribution::reset()
-
-
-template<typename IntType>
-  template<typename UniformRandomNumberGenerator>
-    typename uniform_int_distribution<IntType>::result_type
-      uniform_int_distribution<IntType>
-        ::operator()(UniformRandomNumberGenerator &urng)
-{
-  return operator()(urng, m_param);
-} // end uniform_int_distribution::operator()()
-
-
-template<typename IntType>
-  template<typename UniformRandomNumberGenerator>
-    typename uniform_int_distribution<IntType>::result_type
-      uniform_int_distribution<IntType>
-        ::operator()(UniformRandomNumberGenerator &urng, const param_type &parm)
-{
-  // XXX this implementation is somewhat hacky and will skip
-  //     values if the range of the RNG is smaller than the range of the distribution
-  //     we should improve this implementation in a later version
-
-  typedef typename thrust::detail::largest_available_float::type float_type;
-
-  const float_type real_min(parm.first);
-  const float_type real_max(parm.second);
-
-  // add one to the right end of the interval because it is half-open
-  // XXX adding 1.0 to a potentially large floating point number seems like a bad idea
-  uniform_real_distribution<float_type> real_dist(real_min, real_max + float_type(1));
-
-  return static_cast<result_type>(real_dist(urng));
-} // end uniform_int_distribution::operator()()
-
-
-template<typename IntType>
-  typename uniform_int_distribution<IntType>::result_type
-    uniform_int_distribution<IntType>
-      ::a(void) const
-{
-  return m_param.first;
-} // end uniform_int_distribution<IntType>::a()
-
-
-template<typename IntType>
-  typename uniform_int_distribution<IntType>::result_type
-    uniform_int_distribution<IntType>
-      ::b(void) const
-{
-  return m_param.second;
-} // end uniform_int_distribution::b()
-
-
-template<typename IntType>
-  typename uniform_int_distribution<IntType>::param_type
-    uniform_int_distribution<IntType>
-      ::param(void) const
-{
-  return m_param;
-} // end uniform_int_distribution::param()
-
-
-template<typename IntType>
-  void uniform_int_distribution<IntType>
-    ::param(const param_type &parm)
-{
-  m_param = parm;
-} // end uniform_int_distribution::param()
-
-
-template<typename IntType>
-  typename uniform_int_distribution<IntType>::result_type
-    uniform_int_distribution<IntType>
-      ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  return a();
-} // end uniform_int_distribution::min()
-
-
-template<typename IntType>
-  typename uniform_int_distribution<IntType>::result_type
-    uniform_int_distribution<IntType>
-      ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  return b();
-} // end uniform_int_distribution::max()
-
-
-template<typename IntType>
-  bool uniform_int_distribution<IntType>
-    ::equal(const uniform_int_distribution &rhs) const
-{
-  return param() == rhs.param();
-}
-
-
-template<typename IntType>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>&
-      uniform_int_distribution<IntType>
-        ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags and fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  os << a() << space << b();
-
-  // restore old flags and fill character
-  os.flags(flags);
-  os.fill(fill);
-  return os;
-}
-
-
-template<typename IntType>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>&
-      uniform_int_distribution<IntType>
-        ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base  ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  is >> m_param.first >> m_param.second;
-
-  // restore old flags
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename IntType>
-bool operator==(const uniform_int_distribution<IntType> &lhs,
-                const uniform_int_distribution<IntType> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename IntType>
-bool operator!=(const uniform_int_distribution<IntType> &lhs,
-                const uniform_int_distribution<IntType> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-template<typename IntType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const uniform_int_distribution<IntType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,d);
-}
-
-
-template<typename IntType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           uniform_int_distribution<IntType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,d);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/uniform_real_distribution.inl b/compat/thrust/random/detail/uniform_real_distribution.inl
deleted file mode 100644
index 6f6d6b57b5..0000000000
--- a/compat/thrust/random/detail/uniform_real_distribution.inl
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/uniform_real_distribution.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-template<typename RealType>
-  uniform_real_distribution<RealType>
-    ::uniform_real_distribution(RealType a, RealType b)
-      :m_param(a,b)
-{
-} // end uniform_real_distribution::uniform_real_distribution()
-
-template<typename RealType>
-  uniform_real_distribution<RealType>
-    ::uniform_real_distribution(const param_type &parm)
-      :m_param(parm)
-{
-} // end uniform_real_distribution::uniform_real_distribution()
-
-template<typename RealType>
-  void uniform_real_distribution<RealType>
-    ::reset(void)
-{
-} // end uniform_real_distribution::reset()
-
-template<typename RealType>
-  template<typename UniformRandomNumberGenerator>
-    typename uniform_real_distribution<RealType>::result_type
-      uniform_real_distribution<RealType>
-        ::operator()(UniformRandomNumberGenerator &urng)
-{
-  return operator()(urng, m_param);
-} // end uniform_real::operator()()
-
-template<typename RealType>
-  template<typename UniformRandomNumberGenerator>
-    typename uniform_real_distribution<RealType>::result_type
-      uniform_real_distribution<RealType>
-        ::operator()(UniformRandomNumberGenerator &urng,
-                     const param_type &parm)
-{
-  // call the urng & map its result to [0,1)
-  result_type result = static_cast<result_type>(urng() - UniformRandomNumberGenerator::min);
-
-  // adding one to the denominator ensures that the interval is half-open at 1.0
-  // XXX adding 1.0 to a potentially large floating point number seems like a bad idea
-  // XXX OTOH adding 1 to what is potentially UINT_MAX also seems like a bad idea
-  // XXX we could statically check if 1u + (max - min) is representable and do that, otherwise use the current implementation
-  result /= (result_type(1) + static_cast<result_type>(UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min));
-
-  return (result * (parm.second - parm.first)) + parm.first;
-} // end uniform_real::operator()()
-
-template<typename RealType>
-  typename uniform_real_distribution<RealType>::result_type
-    uniform_real_distribution<RealType>
-      ::a(void) const
-{
-  return m_param.first;
-} // end uniform_real::a()
-
-template<typename RealType>
-  typename uniform_real_distribution<RealType>::result_type
-    uniform_real_distribution<RealType>
-      ::b(void) const
-{
-  return m_param.second;
-} // end uniform_real_distribution::b()
-
-template<typename RealType>
-  typename uniform_real_distribution<RealType>::param_type
-    uniform_real_distribution<RealType>
-      ::param(void) const
-{
-  return m_param;;
-} // end uniform_real_distribution::param()
-
-template<typename RealType>
-  void uniform_real_distribution<RealType>
-    ::param(const param_type &parm)
-{
-  m_param = parm;
-} // end uniform_real_distribution::param()
-
-template<typename RealType>
-  typename uniform_real_distribution<RealType>::result_type
-    uniform_real_distribution<RealType>
-      ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  return a();
-} // end uniform_real_distribution::min()
-
-template<typename RealType>
-  typename uniform_real_distribution<RealType>::result_type
-    uniform_real_distribution<RealType>
-      ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const
-{
-  return b();
-} // end uniform_real_distribution::max()
-
-
-template<typename RealType>
-  bool uniform_real_distribution<RealType>
-    ::equal(const uniform_real_distribution &rhs) const
-{
-  return m_param == rhs.param();
-}
-
-
-template<typename RealType>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>&
-      uniform_real_distribution<RealType>
-        ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags and fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  os << a() << space << b();
-
-  // restore old flags and fill character
-  os.flags(flags);
-  os.fill(fill);
-  return os;
-}
-
-
-template<typename RealType>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>&
-      uniform_real_distribution<RealType>
-        ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base  ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  is >> m_param.first >> m_param.second;
-
-  // restore old flags
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename RealType>
-bool operator==(const uniform_real_distribution<RealType> &lhs,
-                const uniform_real_distribution<RealType> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename RealType>
-bool operator!=(const uniform_real_distribution<RealType> &lhs,
-                const uniform_real_distribution<RealType> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const uniform_real_distribution<RealType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,d);
-}
-
-
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           uniform_real_distribution<RealType> &d)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,d);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/xor_combine_engine.inl b/compat/thrust/random/detail/xor_combine_engine.inl
deleted file mode 100644
index b138722f8b..0000000000
--- a/compat/thrust/random/detail/xor_combine_engine.inl
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/random/xor_combine_engine.h>
-#include <thrust/random/detail/random_core_access.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::xor_combine_engine(void)
-      :m_b1(),m_b2()
-{
-} // end xor_combine_engine::xor_combine_engine()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::xor_combine_engine(const base1_type &urng1, const base2_type &urng2)
-      :m_b1(urng1),m_b2(urng2)
-{
-} // end xor_combine_engine::xor_combine_engine()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::xor_combine_engine(result_type s)
-      :m_b1(s),m_b2(s)
-{
-} // end xor_combine_engine::xor_combine_engine()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  void xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::seed(void)
-{
-  m_b1.seed();
-  m_b2.seed();
-} // end xor_combine_engine::seed()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  void xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::seed(result_type s)
-{
-  m_b1.seed(s);
-  m_b2.seed(s);
-} // end xor_combine_engine::seed()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  const typename xor_combine_engine<Engine1,s1,Engine2,s2>::base1_type &
-    xor_combine_engine<Engine1,s1,Engine2,s2>
-      ::base1(void) const
-{
-  return m_b1;
-} // end xor_combine_engine::base1()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  const typename xor_combine_engine<Engine1,s1,Engine2,s2>::base2_type &
-    xor_combine_engine<Engine1,s1,Engine2,s2>
-      ::base2(void) const
-{
-  return m_b2;
-} // end xor_combine_engine::base2()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  typename xor_combine_engine<Engine1,s1,Engine2,s2>::result_type
-    xor_combine_engine<Engine1,s1,Engine2,s2>
-      ::operator()(void)
-{
-  return (result_type(m_b1() - base1_type::min) << shift1) ^
-         (result_type(m_b2() - base2_type::min) << shift2);
-} // end xor_combine_engine::operator()()
-
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2>
-  void xor_combine_engine<Engine1, s1, Engine2, s2>
-    ::discard(unsigned long long z)
-{
-  for(; z > 0; --z)
-  {
-    this->operator()();
-  } // end for
-} // end xor_combine_engine::discard()
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2>
-  template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& xor_combine_engine<Engine1,s1,Engine2,s2>
-      ::stream_out(std::basic_ostream<CharT,Traits> &os) const
-{
-  typedef std::basic_ostream<CharT,Traits> ostream_type;
-  typedef typename ostream_type::ios_base  ios_base;
-
-  // save old flags and fill character
-  const typename ios_base::fmtflags flags = os.flags();
-  const CharT fill = os.fill();
-
-  const CharT space = os.widen(' ');
-  os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
-  os.fill(space);
-
-  // output each base engine in turn
-  os << base1() << space << base2();
-
-  // restore old flags and fill character
-  os.flags(flags);
-  os.fill(fill);
-  return os;
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2>
-  template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& xor_combine_engine<Engine1,s1,Engine2,s2>
-      ::stream_in(std::basic_istream<CharT,Traits> &is)
-{
-  typedef std::basic_istream<CharT,Traits> istream_type;
-  typedef typename istream_type::ios_base  ios_base;
-
-  // save old flags
-  const typename ios_base::fmtflags flags = is.flags();
-
-  is.flags(ios_base::skipws);
-
-  // input each base engine in turn
-  is >> m_b1 >> m_b2;
-
-  // restore old flags
-  is.flags(flags);
-  return is;
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2>
-  bool xor_combine_engine<Engine1,s1,Engine2,s2>
-    ::equal(const xor_combine_engine<Engine1,s1,Engine2,s2> &rhs) const
-{
-  return (m_b1 == rhs.m_b1) && (m_b2 == rhs.m_b2);
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const xor_combine_engine<Engine1,s1,Engine2,s2> &e)
-{
-  return thrust::random::detail::random_core_access::stream_out(os,e);
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           xor_combine_engine<Engine1,s1,Engine2,s2> &e)
-{
-  return thrust::random::detail::random_core_access::stream_in(is,e);
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2>
-bool operator==(const xor_combine_engine<Engine1,s1,Engine2,s2> &lhs,
-                const xor_combine_engine<Engine1,s1,Engine2,s2> &rhs)
-{
-  return thrust::random::detail::random_core_access::equal(lhs,rhs);
-}
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2>
-bool operator!=(const xor_combine_engine<Engine1,s1,Engine2,s2> &lhs,
-                const xor_combine_engine<Engine1,s1,Engine2,s2> &rhs)
-{
-  return !(lhs == rhs);
-}
-
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/detail/xor_combine_engine_max.h b/compat/thrust/random/detail/xor_combine_engine_max.h
deleted file mode 100644
index 8bad9a462c..0000000000
--- a/compat/thrust/random/detail/xor_combine_engine_max.h
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/mpl/math.h>
-#include <limits>
-#include <cstddef>
-
-namespace thrust
-{
-
-namespace random
-{
-
-namespace detail
-{
-
-
-namespace math = thrust::detail::mpl::math;
-
-
-namespace detail
-{
-
-// two cases for this function avoids compile-time warnings of overflow
-template<typename UIntType, UIntType w,
-         UIntType lhs, UIntType rhs,
-         bool shift_will_overflow>
-  struct lshift_w
-{
-  static const UIntType value = 0;
-};
-
-
-template<typename UIntType, UIntType w,
-         UIntType lhs, UIntType rhs>
-  struct lshift_w<UIntType,w,lhs,rhs,false>
-{
-  static const UIntType value = lhs << rhs;
-};
-
-} // end detail
-
-
-template<typename UIntType, UIntType w,
-         UIntType lhs, UIntType rhs>
-  struct lshift_w
-{
-  static const bool shift_will_overflow = rhs >= w;
-
-  static const UIntType value = detail::lshift_w<UIntType, w, lhs, rhs, shift_will_overflow>::value;
-};
-
-
-template<typename UIntType, UIntType lhs, UIntType rhs>
-  struct lshift
-    : lshift_w<UIntType, std::numeric_limits<UIntType>::digits, lhs, rhs>
-{};
-
-
-template<typename UIntType, int p>
-  struct two_to_the_power
-    : lshift<UIntType, 1, p>
-{};
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  class xor_combine_engine_max_aux_constants
-{
-  public:
-    static const result_type two_to_the_d = two_to_the_power<result_type, d>::value;
-    static const result_type c = lshift<result_type, a, d>::value;
-
-    static const result_type t =
-      math::max<
-        result_type,
-        c,
-        b
-      >::value;
-
-    static const result_type u =
-      math::min<
-        result_type,
-        c,
-        b
-      >::value;
-
-    static const result_type p            = math::log2<u>::value;
-    static const result_type two_to_the_p = two_to_the_power<result_type, p>::value;
-
-    static const result_type k = math::div<result_type, t, two_to_the_p>::value;
-};
-
-
-template<typename result_type, result_type, result_type, int> struct xor_combine_engine_max_aux;
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  struct xor_combine_engine_max_aux_case4
-{
-  typedef xor_combine_engine_max_aux_constants<result_type,a,b,d> constants;
-
-  static const result_type k_plus_1_times_two_to_the_p =
-    lshift<
-      result_type,
-      math::plus<result_type,constants::k,1>::value,
-      constants::p
-    >::value;
-
-  static const result_type M =
-    xor_combine_engine_max_aux<
-      result_type,
-      math::div<
-        result_type,
-        math::mod<
-          result_type,
-          constants::u,
-          constants::two_to_the_p
-        >::value,
-        constants::two_to_the_p
-      >::value,
-      math::mod<
-        result_type,
-        constants::t,
-        constants::two_to_the_p
-      >::value,
-      d
-    >::value;
-
-  static const result_type value = math::plus<result_type, k_plus_1_times_two_to_the_p, M>::value;
-};
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  struct xor_combine_engine_max_aux_case3
-{
-  typedef xor_combine_engine_max_aux_constants<result_type,a,b,d> constants;
-
-  static const result_type k_plus_1_times_two_to_the_p =
-    lshift<
-      result_type,
-      math::plus<result_type,constants::k,1>::value,
-      constants::p
-    >::value;
-
-  static const result_type M =
-    xor_combine_engine_max_aux<
-      result_type,
-      math::div<
-        result_type,
-        math::mod<
-          result_type,
-          constants::t,
-          constants::two_to_the_p
-        >::value,
-        constants::two_to_the_p
-      >::value,
-      math::mod<
-        result_type,
-        constants::u,
-        constants::two_to_the_p
-      >::value,
-      d
-    >::value;
-
-  static const result_type value = math::plus<result_type, k_plus_1_times_two_to_the_p, M>::value;
-};
-
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  struct xor_combine_engine_max_aux_case2
-{
-  typedef xor_combine_engine_max_aux_constants<result_type,a,b,d> constants;
-
-  static const result_type k_plus_1_times_two_to_the_p =
-    lshift<
-      result_type,
-      math::plus<result_type,constants::k,1>::value,
-      constants::p
-    >::value;
-
-  static const result_type value =
-    math::minus<
-      result_type,
-      k_plus_1_times_two_to_the_p,
-      1
-    >::value;
-};
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  struct xor_combine_engine_max_aux_case1
-{
-  static const result_type c     = lshift<result_type, a, d>::value;
-
-  static const result_type value = math::plus<result_type,c,b>::value;
-};
-
-
-template<typename result_type, result_type a, result_type b, int d>
-  struct xor_combine_engine_max_aux_2
-{
-  typedef xor_combine_engine_max_aux_constants<result_type,a,b,d> constants;
-
-  static const result_type value = 
-    thrust::detail::eval_if<
-      // if k is odd...
-      math::is_odd<result_type, constants::k>::value,
-      thrust::detail::identity_<
-        thrust::detail::integral_constant<
-          result_type,
-          xor_combine_engine_max_aux_case2<result_type,a,b,d>::value
-        >
-      >,
-      thrust::detail::eval_if<
-        // otherwise if a * 2^3 >= b, then case 3
-        a * constants::two_to_the_d >= b,
-        thrust::detail::identity_<
-          thrust::detail::integral_constant<
-            result_type,
-            xor_combine_engine_max_aux_case3<result_type,a,b,d>::value
-          >
-        >,
-        // otherwise, case 4
-        thrust::detail::identity_<
-          thrust::detail::integral_constant<
-            result_type,
-            xor_combine_engine_max_aux_case4<result_type,a,b,d>::value
-          >
-        >
-      >
-    >::type::value;
-};
-
-
-template<typename result_type,
-         result_type a,
-         result_type b,
-         int d,
-         bool use_case1 = (a == 0) || (b < two_to_the_power<result_type,d>::value)>
-  struct xor_combine_engine_max_aux_1
-    : xor_combine_engine_max_aux_case1<result_type,a,b,d>
-{};
-
-
-template<typename result_type,
-         result_type a,
-         result_type b,
-         int d>
-  struct xor_combine_engine_max_aux_1<result_type,a,b,d,false>
-    : xor_combine_engine_max_aux_2<result_type,a,b,d>
-{};
-
-
-template<typename result_type,
-         result_type a,
-         result_type b,
-         int d>
-  struct xor_combine_engine_max_aux
-    : xor_combine_engine_max_aux_1<result_type,a,b,d>
-{};
-
-
-template<typename Engine1, size_t s1, typename Engine2, size_t s2, typename result_type>
-  struct xor_combine_engine_max
-{
-  static const size_t w = std::numeric_limits<result_type>::digits;
-
-  static const result_type m1 =
-    math::min<
-      result_type,
-      result_type(Engine1::max - Engine1::min),
-      two_to_the_power<result_type, w-s1>::value - 1 
-    >::value;
-
-  static const result_type m2 =
-    math::min<
-      result_type,
-      result_type(Engine2::max - Engine2::min),
-      two_to_the_power<result_type, w-s2>::value - 1
-    >::value;
-
-  static const result_type s = s1 - s2;
-
-  static const result_type M =
-    xor_combine_engine_max_aux<
-      result_type,
-      m1,
-      m2,
-      s
-    >::value;
-
-  // the value is M(m1,m2,s) lshift_w s2
-  static const result_type value =
-    lshift_w<
-      result_type,
-      w,
-      M,
-      s2
-    >::value;
-}; // end xor_combine_engine_max
-
-} // end detail
-
-} // end random
-
-} // end thrust
-
diff --git a/compat/thrust/random/discard_block_engine.h b/compat/thrust/random/discard_block_engine.h
deleted file mode 100644
index c902c5863b..0000000000
--- a/compat/thrust/random/discard_block_engine.h
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file discard_block_engine.h
- *  \brief A random number engine which adapts a base engine and produces
- *         numbers by discarding all but a contiguous blocks of its values.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#include <thrust/detail/config.h>
-#include <iostream>
-#include <thrust/detail/cstdint.h>
-#include <thrust/random/detail/random_core_access.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-/*! \addtogroup random_number_engine_adaptors Random Number Engine Adaptor Class Templates
- *  \ingroup random
- *  \{
- */
-
-/*! \class discard_block_engine
- *  \brief A \p discard_block_engine adapts an existing base random number engine and produces
- *         random values by discarding some of the values returned by its base engine.
- *         Each cycle of the compound engine begins by returning \c r values successively produced
- *         by the base engine and ends by discarding <tt>p-r</tt> such values. The engine's state
- *         is the state of its base engine followed by the number of calls to <tt>operator()</tt>
- *         that have occurred since the beginning of the current cycle.
- *
- *  \tparam Engine The type of the base random number engine to adapt.
- *  \tparam p The discard cycle length.
- *  \tparam r The number of values to return of the base engine. Because <tt>p-r</tt> will be
- *            discarded, <tt>r <= p</tt>.
- *
- *  The following code snippet shows an example of using a \p discard_block_engine instance:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <thrust/random/discard_block_engine.h>
- *  #include <iostream>
- *
- *  int main(void)
- *  {
- *    // create a discard_block_engine from minstd_rand, with a cycle length of 13
- *    // keep every first 10 values, and discard the next 3
- *    thrust::discard_block_engine<thrust::minstd_rand, 13, 10> rng;
- *
- *    // print a random number to standard output
- *    std::cout << rng() << std::endl;
- *
- *    return 0;
- *  }
- *  \endcode
- */         
-template<typename Engine, size_t p, size_t r>
-  class discard_block_engine
-{
-  public:
-    // types
-
-    /*! \typedef base_type
-     *  \brief The type of the adapted base random number engine.
-     */
-    typedef Engine base_type;
-
-    /*! \typedef result_type
-     *  \brief The type of the unsigned integer produced by this \p linear_congruential_engine.
-     */
-    typedef typename base_type::result_type result_type;
-
-    // engine characteristics
-
-    /*! The length of the production cycle.
-     */
-    static const size_t block_size = p;
-
-    /*! The number of used numbers per production cycle.
-     */
-    static const size_t used_block = r;
-
-    /*! The smallest value this \p discard_block_engine may potentially produce.
-     */
-    static const result_type min = base_type::min;
-
-    /*! The largest value this \p discard_block_engine may potentially produce.
-     */
-    static const result_type max = base_type::max;
-
-    // constructors and seeding functions
-
-    /*! This constructor constructs a new \p discard_block_engine and constructs
-     *  its \p base_type engine using its null constructor.
-     */
-    __host__ __device__
-    discard_block_engine();
-
-    /*! This constructor constructs a new \p discard_block_engine using
-     *  a given \p base_type engine to initialize its adapted base engine.
-     *
-     *  \param urng A \p base_type to use to initialize this \p discard_block_engine's
-     *         adapted base engine.
-     */
-    __host__ __device__
-    explicit discard_block_engine(const base_type &urng);
-
-    /*! This constructor initializes a new \p discard_block_engine with a given seed.
-     *  
-     *  \param s The seed used to intialize this \p discard_block_engine's adapted base engine.
-     */
-    __host__ __device__
-    explicit discard_block_engine(result_type s);
-
-    /*! This method initializes the state of this \p discard_block_engine's adapted base engine
-     *  by using its \p default_seed value.
-     */
-    __host__ __device__
-    void seed(void);
-
-    /*! This method initializes the state of this \p discard_block_engine's adapted base engine
-     *  by using the given seed.
-     *
-     *  \param s The seed with which to intialize this \p discard_block_engine's adapted base engine.
-     */
-    __host__ __device__
-    void seed(result_type s);
-
-    // generating functions
-    
-    /*! This member function produces a new random value and updates this \p discard_block_engine's state.
-     *  \return A new random number.
-     */
-    __host__ __device__
-    result_type operator()(void);
-
-    /*! This member function advances this \p discard_block_engine's state a given number of times
-     *  and discards the results.
-     *
-     *  \param z The number of random values to discard.
-     *  \note This function is provided because an implementation may be able to accelerate it.
-     */
-    __host__ __device__
-    void discard(unsigned long long z);
-
-    // property functions
-
-    /*! This member function returns a const reference to this \p discard_block_engine's
-     *  adapted base engine.
-     *
-     *  \return A const reference to the base engine this \p discard_block_engine adapts.
-     */
-    __host__ __device__
-    const base_type &base(void) const;
-
-    /*! \cond
-     */
-  private:
-    base_type m_e;
-    unsigned int m_n;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const discard_block_engine &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-    /*! \endcond
-     */
-}; // end discard_block_engine
-
-
-/*! This function checks two \p discard_block_engines for equality.
- *  \param lhs The first \p discard_block_engine to test.
- *  \param rhs The second \p discard_block_engine to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename Engine, size_t p, size_t r>
-__host__ __device__
-bool operator==(const discard_block_engine<Engine,p,r> &lhs,
-                const discard_block_engine<Engine,p,r> &rhs);
-
-
-/*! This function checks two \p discard_block_engines for inequality.
- *  \param lhs The first \p discard_block_engine to test.
- *  \param rhs The second \p discard_block_engine to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename Engine, size_t p, size_t r>
-__host__ __device__
-bool operator!=(const discard_block_engine<Engine,p,r> &lhs,
-                const discard_block_engine<Engine,p,r> &rhs);
-
-
-/*! This function streams a discard_block_engine to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param e The \p discard_block_engine to stream out.
- *  \return \p os
- */
-template<typename Engine, size_t p, size_t r,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const discard_block_engine<Engine,p,r> &e);
-
-
-/*! This function streams a discard_block_engine in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param e The \p discard_block_engine to stream in.
- *  \return \p is
- */
-template<typename Engine, size_t p, size_t r,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           discard_block_engine<Engine,p,r> &e);
-
-/*! \} // end random_number_engine_adaptors
- */
-
-} // end random
-
-// import names into thrust::
-using random::discard_block_engine;
-
-} // end thrust
-
-#include <thrust/random/detail/discard_block_engine.inl>
-
diff --git a/compat/thrust/random/linear_congruential_engine.h b/compat/thrust/random/linear_congruential_engine.h
deleted file mode 100644
index 0added0069..0000000000
--- a/compat/thrust/random/linear_congruential_engine.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file linear_congruential_engine.h
- *  \brief A linear congruential pseudorandom number engine.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <iostream>
-#include <thrust/detail/cstdint.h>
-#include <thrust/random/detail/random_core_access.h>
-#include <thrust/random/detail/linear_congruential_engine_discard.h>
-
-namespace thrust
-{
-
-namespace random
-{
-
-/*! \addtogroup random_number_engine_templates Random Number Engine Class Templates
- *  \ingroup random
- *  \{
- */
-
-/*! \class linear_congruential_engine
- *  \brief A \p linear_congruential_engine random number engine produces unsigned integer
- *         random numbers using a linear congruential random number generation algorithm.
- *
- *         The generation algorithm has the form <tt>x_i = (a * x_{i-1} + c) mod m</tt>.
- *
- *  \tparam UIntType The type of unsigned integer to produce.
- *  \tparam a The multiplier used in the generation algorithm.
- *  \tparam c The increment used in the generation algorithm.
- *  \tparam m The modulus used in the generation algorithm.
- *
- *  \note Inexperienced users should not use this class template directly.  Instead, use
- *  \p minstd_rand or \p minstd_rand0.
- *
- *  The following code snippet shows examples of use of a \p linear_congruential_engine instance:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <iostream>
- *
- *  int main(void)
- *  {
- *    // create a minstd_rand object, which is an instance of linear_congruential_engine
- *    thrust::minstd_rand rng1;
- *
- *    // output some random values to cout
- *    std::cout << rng1() << std::endl;
- *
- *    // a random value is printed
- *
- *    // create a new minstd_rand from a seed
- *    thrust::minstd_rand rng2(13);
- *
- *    // discard some random values
- *    rng2.discard(13);
- *
- *    // stream the object to an iostream
- *    std::cout << rng2 << std::endl;
- *
- *    // rng2's current state is printed
- *
- *    // print the minimum and maximum values that minstd_rand can produce
- *    std::cout << thrust::minstd_rand::min << std::endl;
- *    std::cout << thrust::minstd_rand::max << std::endl;
- *
- *    // the range of minstd_rand is printed
- *
- *    // save the state of rng2 to a different object
- *    thrust::minstd_rand rng3 = rng2;
- *
- *    // compare rng2 and rng3
- *    std::cout << (rng2 == rng3) << std::endl;
- *
- *    // 1 is printed
- *
- *    // re-seed rng2 with a different seed
- *    rng2.seed(7);
- *
- *    // compare rng2 and rng3
- *    std::cout << (rng2 == rng3) << std::endl;
- *
- *    // 0 is printed
- *
- *    return 0;
- *  }
- *
- *  \endcode
- *
- *  \see thrust::random::minstd_rand
- *  \see thrust::random::minstd_rand0
- */
-template<typename UIntType, UIntType a, UIntType c, UIntType m>
-  class linear_congruential_engine
-{
-  public:
-    // types
-    
-    /*! \typedef result_type
-     *  \brief The type of the unsigned integer produced by this \p linear_congruential_engine.
-     */
-    typedef UIntType result_type;
-
-    // engine characteristics
-
-    /*! The multiplier used in the generation algorithm.
-     */
-    static const result_type multiplier = a;
-
-    /*! The increment used in the generation algorithm.
-     */
-    static const result_type increment = c;
-
-    /*! The modulus used in the generation algorithm.
-     */
-    static const result_type modulus = m;
-
-    /*! The smallest value this \p linear_congruential_engine may potentially produce.
-     */
-    static const result_type min = c == 0u ? 1u : 0u;
-
-    /*! The largest value this \p linear_congruential_engine may potentially produce.
-     */
-    static const result_type max = m - 1u;
-
-    /*! The default seed of this \p linear_congruential_engine.
-     */
-    static const result_type default_seed = 1u;
-
-    // constructors and seeding functions
-
-    /*! This constructor, which optionally accepts a seed, initializes a new
-     *  \p linear_congruential_engine.
-     *  
-     *  \param s The seed used to intialize this \p linear_congruential_engine's state.
-     */
-    __host__ __device__
-    explicit linear_congruential_engine(result_type s = default_seed);
-
-    /*! This method initializes this \p linear_congruential_engine's state, and optionally accepts
-     *  a seed value.
-     *
-     *  \param s The seed used to initializes this \p linear_congruential_engine's state.
-     */
-    __host__ __device__
-    void seed(result_type s = default_seed);
-
-    // generating functions
-
-    /*! This member function produces a new random value and updates this \p linear_congruential_engine's state.
-     *  \return A new random number.
-     */
-    __host__ __device__
-    result_type operator()(void);
-
-    /*! This member function advances this \p linear_congruential_engine's state a given number of times
-     *  and discards the results.
-     *
-     *  \param z The number of random values to discard.
-     *  \note This function is provided because an implementation may be able to accelerate it.
-     */
-    __host__ __device__
-    void discard(unsigned long long z);
-
-    /*! \cond
-     */
-  private:
-    result_type m_x;
-
-    static void transition(result_type &state);
-
-    friend struct thrust::random::detail::random_core_access;
-
-    friend struct thrust::random::detail::linear_congruential_engine_discard;
-
-    __host__ __device__
-    bool equal(const linear_congruential_engine &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-
-    /*! \endcond
-     */
-}; // end linear_congruential_engine
-
-
-/*! This function checks two \p linear_congruential_engines for equality.
- *  \param lhs The first \p linear_congruential_engine to test.
- *  \param rhs The second \p linear_congruential_engine to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_>
-__host__ __device__
-bool operator==(const linear_congruential_engine<UIntType_,a_,c_,m_> &lhs,
-                const linear_congruential_engine<UIntType_,a_,c_,m_> &rhs);
-
-
-/*! This function checks two \p linear_congruential_engines for inequality.
- *  \param lhs The first \p linear_congruential_engine to test.
- *  \param rhs The second \p linear_congruential_engine to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_>
-__host__ __device__
-bool operator!=(const linear_congruential_engine<UIntType_,a_,c_,m_> &lhs,
-                const linear_congruential_engine<UIntType_,a_,c_,m_> &rhs);
-
-
-/*! This function streams a linear_congruential_engine to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param e The \p linear_congruential_engine to stream out.
- *  \return \p os
- */
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const linear_congruential_engine<UIntType_,a_,c_,m_> &e);
-
-
-/*! This function streams a linear_congruential_engine in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param e The \p linear_congruential_engine to stream in.
- *  \return \p is
- */
-template<typename UIntType_, UIntType_ a_, UIntType_ c_, UIntType_ m_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           linear_congruential_engine<UIntType_,a_,c_,m_> &e);
-
-
-/*! \} // random_number_engine_templates
- */
-
-
-/*! \addtogroup predefined_random
- *  \{
- */
-
-// XXX the type N2111 used here was uint_fast32_t
-
-/*! \typedef minstd_rand0
- *  \brief A random number engine with predefined parameters which implements a version of
- *         the Minimal Standard random number generation algorithm.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand0
- *        shall produce the value \c 1043618065 .
- */
-typedef linear_congruential_engine<thrust::detail::uint32_t, 16807, 0, 2147483647> minstd_rand0;
-
-
-/*! \typedef minstd_rand
- *  \brief A random number engine with predefined parameters which implements a version of
- *         the Minimal Standard random number generation algorithm.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand
- *        shall produce the value \c 399268537 .
- */
-typedef linear_congruential_engine<thrust::detail::uint32_t, 48271, 0, 2147483647> minstd_rand;
-
-/*! \} // predefined_random
- */
-  
-} // end random
-
-// import names into thrust::
-using random::linear_congruential_engine;
-using random::minstd_rand;
-using random::minstd_rand0;
-
-} // end thrust
-
-#include <thrust/random/detail/linear_congruential_engine.inl>
-
diff --git a/compat/thrust/random/linear_feedback_shift_engine.h b/compat/thrust/random/linear_feedback_shift_engine.h
deleted file mode 100644
index f5646c9483..0000000000
--- a/compat/thrust/random/linear_feedback_shift_engine.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file linear_feedback_shift_engine.h
- *  \brief A linear feedback shift pseudorandom number generator.
- */
-
-/*
- * Copyright Jens Maurer 2002
- *
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/random/detail/linear_feedback_shift_engine_wordmask.h>
-#include <iostream>
-#include <cstddef> // for size_t
-#include <thrust/random/detail/random_core_access.h>
-
-namespace thrust
-{
-
-
-namespace random
-{
-
-/*! \addtogroup random_number_engine_templates
- *  \{
- */
-
-/*! \class linear_feedback_shift_engine
- *  \brief A \p linear_feedback_shift_engine random number engine produces
- *         unsigned integer random values using a linear feedback shift random number
- *         generation algorithm.
- *
- *  \tparam UIntType The type of unsigned integer to produce.
- *  \tparam w The word size of the produced values (<tt>w <= sizeof(UIntType)</tt>).
- *  \tparam k The k parameter of Tausworthe's 1965 algorithm.
- *  \tparam q The q exponent of Tausworthe's 1965 algorithm.
- *  \tparam s The step size of Tausworthe's 1965 algorithm.
- *
- *  \note linear_feedback_shift_engine is based on the Boost Template Library's linear_feedback_shift.
- */
-template<typename UIntType, size_t w, size_t k, size_t q, size_t s>
-  class linear_feedback_shift_engine
-{
-  public:
-    // types
-
-    /*! \typedef result_type
-     *  \brief The type of the unsigned integer produced by this \p linear_feedback_shift_engine.
-     */
-    typedef UIntType result_type;
-
-    // engine characteristics
-
-    /*! The word size of the produced values.
-     */
-    static const size_t word_size = w;
-
-    /*! A constant used in the generation algorithm.
-     */
-    static const size_t exponent1 = k;
-
-    /*! A constant used in the generation algorithm.
-     */
-    static const size_t exponent2 = q;
-
-    /*! The step size used in the generation algorithm.
-     */
-    static const size_t step_size = s;
-
-    /*! \cond
-     */
-  private:
-    static const result_type wordmask =
-      detail::linear_feedback_shift_engine_wordmask<
-        result_type,
-        w
-      >::value;
-    /*! \endcond
-     */
-
-  public:
-
-    /*! The smallest value this \p linear_feedback_shift_engine may potentially produce.
-     */
-    static const result_type min = 0;
-
-    /*! The largest value this \p linear_feedback_shift_engine may potentially produce.
-     */
-    static const result_type max = wordmask;
-
-    /*! The default seed of this \p linear_feedback_shift_engine.
-     */
-    static const result_type default_seed = 341u;
-
-    // constructors and seeding functions
-
-    /*! This constructor, which optionally accepts a seed, initializes a new
-     *  \p linear_feedback_shift_engine.
-     *  
-     *  \param value The seed used to intialize this \p linear_feedback_shift_engine's state.
-     */
-    __host__ __device__
-    explicit linear_feedback_shift_engine(result_type value = default_seed);
-
-    /*! This method initializes this \p linear_feedback_shift_engine's state, and optionally accepts
-     *  a seed value.
-     *
-     *  \param value The seed used to initializes this \p linear_feedback_shift_engine's state.
-     */
-    __host__ __device__
-    void seed(result_type value = default_seed);
-
-    // generating functions
-    
-    /*! This member function produces a new random value and updates this \p linear_feedback_shift_engine's state.
-     *  \return A new random number.
-     */
-    __host__ __device__
-    result_type operator()(void);
-
-    /*! This member function advances this \p linear_feedback_shift_engine's state a given number of times
-     *  and discards the results.
-     *
-     *  \param z The number of random values to discard.
-     *  \note This function is provided because an implementation may be able to accelerate it.
-     */
-    __host__ __device__
-    void discard(unsigned long long z);
-
-    /*! \cond
-     */
-  private:
-    result_type m_value;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const linear_feedback_shift_engine &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-
-    /*! \endcond
-     */
-}; // end linear_feedback_shift_engine
-
-
-/*! This function checks two \p linear_feedback_shift_engines for equality.
- *  \param lhs The first \p linear_feedback_shift_engine to test.
- *  \param rhs The second \p linear_feedback_shift_engine to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_>
-__host__ __device__
-bool operator==(const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &lhs,
-                const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &rhs);
-
-
-/*! This function checks two \p linear_feedback_shift_engines for inequality.
- *  \param lhs The first \p linear_feedback_shift_engine to test.
- *  \param rhs The second \p linear_feedback_shift_engine to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_>
-__host__ __device__
-bool operator!=(const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &lhs,
-                const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &rhs);
-
-
-/*! This function streams a linear_feedback_shift_engine to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param e The \p linear_feedback_shift_engine to stream out.
- *  \return \p os
- */
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &e);
-
-
-/*! This function streams a linear_feedback_shift_engine in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param e The \p linear_feedback_shift_engine to stream in.
- *  \return \p is
- */
-template<typename UIntType_, size_t w_, size_t k_, size_t q_, size_t s_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           linear_feedback_shift_engine<UIntType_,w_,k_,q_,s_> &e);
-
-
-/*! \} // end random_number_engine_templates
- */
-
-
-} // end random
-
-// import names into thrust::
-using random::linear_feedback_shift_engine;
-
-} // end thrust
-
-#include <thrust/random/detail/linear_feedback_shift_engine.inl>
-
diff --git a/compat/thrust/random/normal_distribution.h b/compat/thrust/random/normal_distribution.h
deleted file mode 100644
index 5543f30a5f..0000000000
--- a/compat/thrust/random/normal_distribution.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file normal_distribution.h
- *  \brief A normal (Gaussian) distribution of real-valued numbers.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/random/detail/random_core_access.h>
-#include <thrust/random/detail/normal_distribution_base.h>
-#include <iostream>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-/*! \addtogroup random_number_distributions
- *  \{
- */
-
-/*! \class normal_distribution
- *  \brief A \p normal_distribution random number distribution produces floating point
- *         Normally distributed random numbers.
- *
- *  \tparam RealType The type of floating point number to produce.
- *
- *  The following code snippet demonstrates examples of using a \p normal_distribution with a 
- *  random number engine to produce random values drawn from the Normal distribution with a given
- *  mean and variance:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <thrust/random/normal_distribution.h>
- *
- *  int main(void)
- *  {
- *    // create a minstd_rand object to act as our source of randomness
- *    thrust::minstd_rand rng;
- *
- *    // create a normal_distribution to produce floats from the Normal distribution
- *    // with mean 2.0 and standard deviation 3.5
- *    thrust::random::normal_distribution<float> dist(2.0f, 3.5f);
- *
- *    // write a random number to standard output
- *    std::cout << dist(rng) << std::endl;
- *
- *    // write the mean of the distribution, just in case we forgot
- *    std::cout << dist.mean() << std::endl;
- *
- *    // 2.0 is printed
- *
- *    // and the standard deviation
- *    std::cout << dist.stddev() << std::endl;
- *
- *    // 3.5 is printed
- *
- *    return 0;
- *  }
- *  \endcode
- */
-template<typename RealType = double>
-  class normal_distribution
-    : public detail::normal_distribution_base<RealType>::type
-{
-  private:
-    typedef typename detail::normal_distribution_base<RealType>::type super_t;
-
-  public:
-    // types
-    
-    /*! \typedef result_type
-     *  \brief The type of the floating point number produced by this \p normal_distribution.
-     */
-    typedef RealType result_type;
-
-    /*! \typedef param_type
-     *  \brief The type of the object encapsulating this \p normal_distribution's parameters.
-     */
-    typedef thrust::pair<RealType,RealType> param_type;
-
-    // constructors and reset functions
-    
-    /*! This constructor creates a new \p normal_distribution from two values defining the
-     *  half-open interval of the distribution.
-     *  
-     *  \param mean The mean (expected value) of the distribution. Defaults to \c 0.0.
-     *  \param stddev The standard deviation of the distribution. Defaults to \c 1.0.
-     */
-    __host__ __device__
-    explicit normal_distribution(RealType mean = 0.0, RealType stddev = 1.0);
-
-    /*! This constructor creates a new \p normal_distribution from a \p param_type object
-     *  encapsulating the range of the distribution.
-     *  
-     *  \param parm A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of the distribution.
-     */
-    __host__ __device__
-    explicit normal_distribution(const param_type &parm);
-
-    /*! Calling this member function guarantees that subsequent uses of this
-     *  \p normal_distribution do not depend on values produced by any random
-     *  number generator prior to invoking this function.
-     */
-    __host__ __device__
-    void reset(void);
-
-    // generating functions
-
-    /*! This method produces a new Normal random integer drawn from this \p normal_distribution's
-     *  range using a \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng);
-
-    /*! This method produces a new Normal random integer as if by creating a new \p normal_distribution 
-     *  from the given \p param_type object, and calling its <tt>operator()</tt> method with the given
-     *  \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     *  \param parm A \p param_type object encapsulating the parameters of the \p normal_distribution
-     *              to draw from.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm);
-
-    // property functions
-
-    /*! This method returns the value of the parameter with which this \p normal_distribution
-     *  was constructed.
-     *
-     *  \return The mean (expected value) of this \p normal_distribution's output.
-     */
-    __host__ __device__
-    result_type mean(void) const;
-
-    /*! This method returns the value of the parameter with which this \p normal_distribution
-     *  was constructed.
-     *
-     *  \return The standard deviation of this \p uniform_real_distribution's output.
-     */
-    __host__ __device__
-    result_type stddev(void) const;
-
-    /*! This method returns a \p param_type object encapsulating the parameters with which this
-     *  \p normal_distribution was constructed.
-     *
-     *  \return A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of this \p normal_distribution.
-     */
-    __host__ __device__
-    param_type param(void) const;
-
-    /*! This method changes the parameters of this \p normal_distribution using the values encapsulated
-     *  in a given \p param_type object.
-     *
-     *  \param parm A \p param_type object encapsulating the new parameters (i.e., the mean and variance) of this \p normal_distribution.
-     */
-    __host__ __device__
-    void param(const param_type &parm);
-
-    /*! This method returns the smallest floating point number this \p normal_distribution can potentially produce.
-     *
-     *  \return The lower bound of this \p normal_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce.
-     *
-     *  \return The upper bound of this \p normal_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! \cond
-     */
-  private:
-    param_type m_param;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const normal_distribution &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-    /*! \endcond
-     */
-}; // end normal_distribution
-
-
-/*! This function checks two \p normal_distributions for equality.
- *  \param lhs The first \p normal_distribution to test.
- *  \param rhs The second \p normal_distribution to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename RealType>
-__host__ __device__
-bool operator==(const normal_distribution<RealType> &lhs,
-                const normal_distribution<RealType> &rhs);
-
-
-/*! This function checks two \p normal_distributions for inequality.
- *  \param lhs The first \p normal_distribution to test.
- *  \param rhs The second \p normal_distribution to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename RealType>
-__host__ __device__
-bool operator!=(const normal_distribution<RealType> &lhs,
-                const normal_distribution<RealType> &rhs);
-
-
-/*! This function streams a normal_distribution to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param d The \p normal_distribution to stream out.
- *  \return \p os
- */
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const normal_distribution<RealType> &d);
-
-
-/*! This function streams a normal_distribution in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param d The \p normal_distribution to stream in.
- *  \return \p is
- */
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           normal_distribution<RealType> &d);
-
-
-/*! \} // end random_number_distributions
- */
-
-
-} // end random
-
-using random::normal_distribution;
-
-} // end thrust
-
-#include <thrust/random/detail/normal_distribution.inl>
-
diff --git a/compat/thrust/random/subtract_with_carry_engine.h b/compat/thrust/random/subtract_with_carry_engine.h
deleted file mode 100644
index b88810097b..0000000000
--- a/compat/thrust/random/subtract_with_carry_engine.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file subtract_with_carry_engine.h
- *  \brief A subtract-with-carry pseudorandom number generator
- *         based on Marsaglia & Zaman.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/random/detail/random_core_access.h>
-
-#include <thrust/detail/cstdint.h>
-#include <cstddef> // for size_t
-#include <iostream>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-/*! \addtogroup random_number_engine_templates
- *  \{
- */
-
-/*! \class subtract_with_carry_engine
- *  \brief A \p subtract_with_carry_engine random number engine produces unsigned
- *         integer random numbers using the subtract with carry algorithm of Marsaglia & Zaman.
- *
- *         The generation algorithm is performed as follows:
- *         -# Let <tt>Y = X_{i-s}- X_{i-r} - c</tt>.
- *         -# Set <tt>X_i</tt> to <tt>y = T mod m</tt>. Set \c c to \c 1 if <tt>Y < 0</tt>, otherwise set \c c to \c 0.
- *
- *         This algorithm corresponds to a modular linear function of the form
- *
- *         <tt>TA(x_i) = (a * x_i) mod b</tt>, where \c b is of the form <tt>m^r - m^s + 1</tt> and
- *         <tt>a = b - (b-1)/m</tt>.
- *
- *  \tparam UIntType The type of unsigned integer to produce.
- *  \tparam w The word size of the produced values (<tt> w <= sizeof(UIntType)</tt>).
- *  \tparam s The short lag of the generation algorithm.
- *  \tparam r The long lag of the generation algorithm.
- *
- *  \note Inexperienced users should not use this class template directly.  Instead, use
- *  \p ranlux24_base or \p ranlux48_base, which are instances of \p subtract_with_carry_engine.
- *
- *  \see thrust::random::ranlux24_base
- *  \see thrust::random::ranlux48_base
- */
-template<typename UIntType, size_t w, size_t s, size_t r>
-  class subtract_with_carry_engine
-{
-    /*! \cond
-     */
-  private:
-    static const UIntType modulus = UIntType(1) << w;
-    /*! \endcond
-     */
-
-  public:
-    // types
-    
-    /*! \typedef result_type
-     *  \brief The type of the unsigned integer produced by this \p subtract_with_carry_engine.
-     */
-    typedef UIntType result_type;
-
-    // engine characteristics
-
-    /*! The word size of the produced values.
-     */
-    static const size_t word_size = w;
-
-    /*! The size of the short lag used in the generation algorithm.
-     */
-    static const size_t short_lag = s;
-
-    /*! The size of the long lag used in the generation algorithm.
-     */
-    static const size_t long_lag = r;
-
-    /*! The smallest value this \p subtract_with_carry_engine may potentially produce.
-     */
-    static const result_type min = 0;
-
-    /*! The largest value this \p subtract_with_carry_engine may potentially produce.
-     */
-    static const result_type max = modulus - 1;
-
-    /*! The default seed of this \p subtract_with_carry_engine.
-     */
-    static const result_type default_seed = 19780503u;
-
-    // constructors and seeding functions
-
-    /*! This constructor, which optionally accepts a seed, initializes a new
-     *  \p subtract_with_carry_engine.
-     *  
-     *  \param value The seed used to intialize this \p subtract_with_carry_engine's state.
-     */
-    __host__ __device__
-    explicit subtract_with_carry_engine(result_type value = default_seed);
-
-    /*! This method initializes this \p subtract_with_carry_engine's state, and optionally accepts
-     *  a seed value.
-     *
-     *  \param value The seed used to initializes this \p subtract_with_carry_engine's state.
-     */
-    __host__ __device__
-    void seed(result_type value = default_seed);
-
-    // generating functions
-    
-    /*! This member function produces a new random value and updates this \p subtract_with_carry_engine's state.
-     *  \return A new random number.
-     */
-    __host__ __device__
-    result_type operator()(void);
-
-    /*! This member function advances this \p subtract_with_carry_engine's state a given number of times
-     *  and discards the results.
-     *
-     *  \param z The number of random values to discard.
-     *  \note This function is provided because an implementation may be able to accelerate it.
-     */
-    __host__ __device__
-    void discard(unsigned long long z);
-
-    /*! \cond
-     */
-  private:
-    result_type m_x[long_lag];
-    unsigned int m_k;
-    int m_carry;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const subtract_with_carry_engine &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-
-    /*! \endcond
-     */
-}; // end subtract_with_carry_engine
-
-
-/*! This function checks two \p subtract_with_carry_engines for equality.
- *  \param lhs The first \p subtract_with_carry_engine to test.
- *  \param rhs The second \p subtract_with_carry_engine to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, size_t w_, size_t s_, size_t r_>
-__host__ __device__
-bool operator==(const subtract_with_carry_engine<UIntType_,w_,s_,r_> &lhs,
-                const subtract_with_carry_engine<UIntType_,w_,s_,r_> &rhs);
-
-
-/*! This function checks two \p subtract_with_carry_engines for inequality.
- *  \param lhs The first \p subtract_with_carry_engine to test.
- *  \param rhs The second \p subtract_with_carry_engine to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename UIntType_, size_t w_, size_t s_, size_t r_>
-__host__ __device__
-bool operator!=(const subtract_with_carry_engine<UIntType_,w_,s_,r_>&lhs,
-                const subtract_with_carry_engine<UIntType_,w_,s_,r_>&rhs);
-
-
-/*! This function streams a subtract_with_carry_engine to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param e The \p subtract_with_carry_engine to stream out.
- *  \return \p os
- */
-template<typename UIntType_, size_t w_, size_t s_, size_t r_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const subtract_with_carry_engine<UIntType_,w_,s_,r_> &e);
-
-
-/*! This function streams a subtract_with_carry_engine in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param e The \p subtract_with_carry_engine to stream in.
- *  \return \p is
- */
-template<typename UIntType_, size_t w_, size_t s_, size_t r_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           subtract_with_carry_engine<UIntType_,w_,s_,r_> &e);
-
-
-/*! \} // end random_number_engine_templates
- */
-
-
-/*! \addtogroup predefined_random
- *  \{
- */
-
-// XXX N2111 uses uint_fast32_t here
-
-/*! \typedef ranlux24_base
- *  \brief A random number engine with predefined parameters which implements the
- *         base engine of the \p ranlux24 random number engine.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24_base
- *        shall produce the value \c 7937952 .
- */
-typedef subtract_with_carry_engine<thrust::detail::uint32_t, 24, 10, 24> ranlux24_base;
-
-
-// XXX N2111 uses uint_fast64_t here
-
-/*! \typedef ranlux48_base
- *  \brief A random number engine with predefined parameters which implements the
- *         base engine of the \p ranlux48 random number engine.
- *  \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48_base
- *        shall produce the value \c 192113843633948 .
- */
-typedef subtract_with_carry_engine<thrust::detail::uint64_t, 48,  5, 12> ranlux48_base;
-
-/*! \} // end predefined_random
- */
-
-} // end random
-
-// import names into thrust::
-using random::subtract_with_carry_engine;
-using random::ranlux24_base;
-using random::ranlux48_base;
-
-} // end thrust
-
-#include <thrust/random/detail/subtract_with_carry_engine.inl>
-
diff --git a/compat/thrust/random/uniform_int_distribution.h b/compat/thrust/random/uniform_int_distribution.h
deleted file mode 100644
index d05f7faaf3..0000000000
--- a/compat/thrust/random/uniform_int_distribution.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uniform_int_distribution.h
- *  \brief A uniform distribution of integer-valued numbers
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/detail/integer_traits.h>
-#include <thrust/random/detail/random_core_access.h>
-#include <iostream>
-
-namespace thrust
-{
-
-namespace random
-{
-
-/*! \addtogroup random_number_distributions Random Number Distributions Class Templates
- *  \ingroup random
- *  \{
- */
-
-/*! \class uniform_int_distribution
- *  \brief A \p uniform_int_distribution random number distribution produces signed or unsigned integer
- *         uniform random numbers from a given range.
- *
- *  \tparam IntType The type of integer to produce.
- *
- *  The following code snippet demonstrates examples of using a \p uniform_int_distribution with a 
- *  random number engine to produce random integers drawn from a given range:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <thrust/random/uniform_int_distribution.h>
- *
- *  int main(void)
- *  {
- *    // create a minstd_rand object to act as our source of randomness
- *    thrust::minstd_rand rng;
- *
- *    // create a uniform_int_distribution to produce ints from [-7,13]
- *    thrust::uniform_int_distribution<int> dist(-7,13);
- *
- *    // write a random number from the range [-7,13] to standard output
- *    std::cout << dist(rng) << std::endl;
- *
- *    // write the range of the distribution, just in case we forgot
- *    std::cout << dist.min() << std::endl;
- *
- *    // -7 is printed
- *
- *    std::cout << dist.max() << std::endl;
- *
- *    // 13 is printed
- *
- *    // write the parameters of the distribution (which happen to be the bounds) to standard output
- *    std::cout << dist.a() << std::endl;
- *
- *    // -7 is printed
- *
- *    std::cout << dist.b() << std::endl;
- *
- *    // 13 is printed
- *
- *    return 0;
- *  }
- *  \endcode
- */
-template<typename IntType = int>
-  class uniform_int_distribution
-{
-  public:
-    // types
-
-    /*! \typedef result_type
-     *  \brief The type of the integer produced by this \p uniform_int_distribution.
-     */
-    typedef IntType result_type;
-
-    /*! \typedef param_type
-     *  \brief The type of the object encapsulating this \p uniform_int_distribution's parameters.
-     */
-    typedef thrust::pair<IntType,IntType> param_type;
-
-    // constructors and reset functions
-
-    /*! This constructor creates a new \p uniform_int_distribution from two values defining the
-     *  range of the distribution.
-     *  
-     *  \param a The smallest integer to potentially produce. Defaults to \c 0.
-     *  \param b The largest integer to potentially produce. Defaults to the largest representable integer in
-     *           the platform.
-     */
-    __host__ __device__
-    explicit uniform_int_distribution(IntType a = 0, IntType b = thrust::detail::integer_traits<IntType>::const_max);
-
-    /*! This constructor creates a new \p uniform_int_distribution from a \p param_type object
-     *  encapsulating the range of the distribution.
-     *  
-     *  \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution.
-     */
-    __host__ __device__
-    explicit uniform_int_distribution(const param_type &parm);
-
-    /*! This does nothing.  It is included to conform to the requirements of the RandomDistribution concept.
-     */
-    __host__ __device__
-    void reset(void);
-
-    // generating functions
-
-    /*! This method produces a new uniform random integer drawn from this \p uniform_int_distribution's
-     *  range using a \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng);
-
-    /*! This method produces a new uniform random integer as if by creating a new \p uniform_int_distribution 
-     *  from the given \p param_type object, and calling its <tt>operator()</tt> method with the given
-     *  \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     *  \param parm A \p param_type object encapsulating the parameters of the \p uniform_int_distribution
-     *              to draw from.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm);
-
-    // property functions
-    
-    /*! This method returns the value of the parameter with which this \p uniform_int_distribution
-     *  was constructed.
-     *
-     *  \return The lower bound of this \p uniform_int_distribution's range.
-     */
-    __host__ __device__
-    result_type a(void) const;
-
-    /*! This method returns the value of the parameter with which this \p uniform_int_distribution
-     *  was constructed.
-     *
-     *  \return The upper bound of this \p uniform_int_distribution's range.
-     */
-    __host__ __device__
-    result_type b(void) const;
-
-    /*! This method returns a \p param_type object encapsulating the parameters with which this
-     *  \p uniform_int_distribution was constructed.
-     *
-     *  \return A \p param_type object enapsulating the range of this \p uniform_int_distribution.
-     */
-    __host__ __device__
-    param_type param(void) const;
-
-    /*! This method changes the parameters of this \p uniform_int_distribution using the values encapsulated
-     *  in a given \p param_type object.
-     *
-     *  \param parm A \p param_type object encapsulating the new range of this \p uniform_int_distribution.
-     */
-    __host__ __device__
-    void param(const param_type &parm);
-
-    /*! This method returns the smallest integer this \p uniform_int_distribution can potentially produce.
-     *
-     *  \return The lower bound of this \p uniform_int_distribution's range.
-     */
-    __host__ __device__
-    result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! This method returns the largest integer this \p uniform_int_distribution can potentially produce.
-     *
-     *  \return The upper bound of this \p uniform_int_distribution's range.
-     */
-    __host__ __device__
-    result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! \cond
-     */
-  private:
-    param_type m_param;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const uniform_int_distribution &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-    /*! \endcond
-     */
-}; // end uniform_int_distribution
-
-
-/*! This function checks two \p uniform_int_distributions for equality.
- *  \param lhs The first \p uniform_int_distribution to test.
- *  \param rhs The second \p uniform_int_distribution to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename IntType>
-__host__ __device__
-bool operator==(const uniform_int_distribution<IntType> &lhs,
-                const uniform_int_distribution<IntType> &rhs);
-
-
-/*! This function checks two \p uniform_int_distributions for inequality.
- *  \param lhs The first \p uniform_int_distribution to test.
- *  \param rhs The second \p uniform_int_distribution to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename IntType>
-__host__ __device__
-bool operator!=(const uniform_int_distribution<IntType> &lhs,
-                const uniform_int_distribution<IntType> &rhs);
-
-
-/*! This function streams a uniform_int_distribution to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param d The \p uniform_int_distribution to stream out.
- *  \return \p os
- */
-template<typename IntType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const uniform_int_distribution<IntType> &d);
-
-
-/*! This function streams a uniform_int_distribution in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param d The \p uniform_int_distribution to stream in.
- *  \return \p is
- */
-template<typename IntType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           uniform_int_distribution<IntType> &d);
-
-
-/*! \} // end random_number_distributions
- */
-
-
-} // end random
-
-using random::uniform_int_distribution;
-
-} // end thrust
-
-#include <thrust/random/detail/uniform_int_distribution.inl>
-
diff --git a/compat/thrust/random/uniform_real_distribution.h b/compat/thrust/random/uniform_real_distribution.h
deleted file mode 100644
index ab85ab33dc..0000000000
--- a/compat/thrust/random/uniform_real_distribution.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uniform_real_distribution.h
- *  \brief A uniform distribution of real-valued numbers
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/random/detail/random_core_access.h>
-#include <iostream>
-
-namespace thrust
-{
-
-namespace random
-{
-
-
-/*! \addtogroup random_number_distributions
- *  \{
- */
-
-/*! \class uniform_real_distribution
- *  \brief A \p uniform_real_distribution random number distribution produces floating point
- *         uniform random numbers from a half-open interval.
- *
- *  \tparam RealType The type of floating point number to produce.
- *
- *  The following code snippet demonstrates examples of using a \p uniform_real_distribution with a 
- *  random number engine to produce random integers drawn from a given range:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <thrust/random/uniform_real_distribution.h>
- *
- *  int main(void)
- *  {
- *    // create a minstd_rand object to act as our source of randomness
- *    thrust::minstd_rand rng;
- *
- *    // create a uniform_real_distribution to produce floats from [-7,13)
- *    thrust::uniform_real_distribution<float> dist(-7,13);
- *
- *    // write a random number from the range [-7,13) to standard output
- *    std::cout << dist(rng) << std::endl;
- *
- *    // write the range of the distribution, just in case we forgot
- *    std::cout << dist.min() << std::endl;
- *
- *    // -7.0 is printed
- *
- *    std::cout << dist.max() << std::endl;
- *
- *    // 13.0 is printed
- *
- *    // write the parameters of the distribution (which happen to be the bounds) to standard output
- *    std::cout << dist.a() << std::endl;
- *
- *    // -7.0 is printed
- *
- *    std::cout << dist.b() << std::endl;
- *
- *    // 13.0 is printed
- *
- *    return 0;
- *  }
- *  \endcode
- */
-template<typename RealType = double>
-  class uniform_real_distribution
-{
-  public:
-    // types
-    
-    /*! \typedef result_type
-     *  \brief The type of the floating point number produced by this \p uniform_real_distribution.
-     */
-    typedef RealType result_type;
-
-    /*! \typedef param_type
-     *  \brief The type of the object encapsulating this \p uniform_real_distribution's parameters.
-     */
-    typedef thrust::pair<RealType,RealType> param_type;
-
-    // constructors and reset functions
-    
-    /*! This constructor creates a new \p uniform_real_distribution from two values defining the
-     *  half-open interval of the distribution.
-     *  
-     *  \param a The smallest floating point number to potentially produce. Defaults to \c 0.0.
-     *  \param b The smallest number larger than the largest floating point number to potentially produce. Defaults to \c 1.0.
-     */
-    __host__ __device__
-    explicit uniform_real_distribution(RealType a = 0.0, RealType b = 1.0);
-
-    /*! This constructor creates a new \p uniform_real_distribution from a \p param_type object
-     *  encapsulating the range of the distribution.
-     *  
-     *  \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution.
-     */
-    __host__ __device__
-    explicit uniform_real_distribution(const param_type &parm);
-
-    /*! This does nothing.  It is included to conform to the requirements of the RandomDistribution concept.
-     */
-    __host__ __device__
-    void reset(void);
-
-    // generating functions
-
-    /*! This method produces a new uniform random integer drawn from this \p uniform_real_distribution's
-     *  range using a \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng);
-
-    /*! This method produces a new uniform random integer as if by creating a new \p uniform_real_distribution 
-     *  from the given \p param_type object, and calling its <tt>operator()</tt> method with the given
-     *  \p UniformRandomNumberGenerator as a source of randomness.
-     *
-     *  \param urng The \p UniformRandomNumberGenerator to use as a source of randomness.
-     *  \param parm A \p param_type object encapsulating the parameters of the \p uniform_real_distribution
-     *              to draw from.
-     */
-    template<typename UniformRandomNumberGenerator>
-    __host__ __device__
-    result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm);
-
-    // property functions
-
-    /*! This method returns the value of the parameter with which this \p uniform_real_distribution
-     *  was constructed.
-     *
-     *  \return The lower bound of this \p uniform_real_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type a(void) const;
-
-    /*! This method returns the value of the parameter with which this \p uniform_real_distribution
-     *  was constructed.
-     *
-     *  \return The upper bound of this \p uniform_real_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type b(void) const;
-
-    /*! This method returns a \p param_type object encapsulating the parameters with which this
-     *  \p uniform_real_distribution was constructed.
-     *
-     *  \return A \p param_type object enapsulating the half-open interval of this \p uniform_real_distribution.
-     */
-    __host__ __device__
-    param_type param(void) const;
-
-    /*! This method changes the parameters of this \p uniform_real_distribution using the values encapsulated
-     *  in a given \p param_type object.
-     *
-     *  \param parm A \p param_type object encapsulating the new half-open interval of this \p uniform_real_distribution.
-     */
-    __host__ __device__
-    void param(const param_type &parm);
-
-    /*! This method returns the smallest floating point number this \p uniform_real_distribution can potentially produce.
-     *
-     *  \return The lower bound of this \p uniform_real_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce.
-     *
-     *  \return The upper bound of this \p uniform_real_distribution's half-open interval.
-     */
-    __host__ __device__
-    result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const;
-
-    /*! \cond
-     */
-  private:
-    param_type m_param;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const uniform_real_distribution &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-    /*! \endcond
-     */
-}; // end uniform_real_distribution
-
-
-/*! This function checks two \p uniform_real_distributions for equality.
- *  \param lhs The first \p uniform_real_distribution to test.
- *  \param rhs The second \p uniform_real_distribution to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename RealType>
-__host__ __device__
-bool operator==(const uniform_real_distribution<RealType> &lhs,
-                const uniform_real_distribution<RealType> &rhs);
-
-
-/*! This function checks two \p uniform_real_distributions for inequality.
- *  \param lhs The first \p uniform_real_distribution to test.
- *  \param rhs The second \p uniform_real_distribution to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename RealType>
-__host__ __device__
-bool operator!=(const uniform_real_distribution<RealType> &lhs,
-                const uniform_real_distribution<RealType> &rhs);
-
-
-/*! This function streams a uniform_real_distribution to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param d The \p uniform_real_distribution to stream out.
- *  \return \p os
- */
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const uniform_real_distribution<RealType> &d);
-
-
-/*! This function streams a uniform_real_distribution in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param d The \p uniform_real_distribution to stream in.
- *  \return \p is
- */
-template<typename RealType,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           uniform_real_distribution<RealType> &d);
-
-
-/*! \} // end random_number_distributions
- */
-
-
-} // end random
-
-using random::uniform_real_distribution;
-
-} // end thrust
-
-#include <thrust/random/detail/uniform_real_distribution.inl>
-
diff --git a/compat/thrust/random/xor_combine_engine.h b/compat/thrust/random/xor_combine_engine.h
deleted file mode 100644
index 61eb5a50c2..0000000000
--- a/compat/thrust/random/xor_combine_engine.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file xor_combine_engine.h
- *  \brief A pseudorandom number generator which produces pseudorandom
- *         numbers from two integer base engines by merging their
- *         pseudorandom numbers with bitwise exclusive-or.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/random/detail/xor_combine_engine_max.h>
-#include <thrust/random/detail/random_core_access.h>
-#include <iostream>
-#include <cstddef> // for size_t
-
-namespace thrust
-{
-
-namespace random
-{
-
-/*! \addtogroup random_number_engine_adaptors
- *  \{
- */
-
-/*! \class xor_combine_engine
- *  \brief An \p xor_combine_engine adapts two existing base random number engines and
- *         produces random values by combining the values produced by each.
- *
- *  \tparam Engine1 The type of the first base random number engine to adapt.
- *  \tparam s1 The size of the first shift to use in the generation algorithm.
- *  \tparam Engine2 The type of the second base random number engine to adapt.
- *  \tparam s2 The second of the second shift to use in the generation algorithm. Defaults to \c 0.
- *
- *  The following code snippet shows an example of using an \p xor_combine_engine instance:
- *
- *  \code
- *  #include <thrust/random/linear_congruential_engine.h>
- *  #include <thrust/random/xor_combine_engine.h>
- *  #include <iostream>
- *
- *  int main(void)
- *  {
- *    // create an xor_combine_engine from minstd_rand and minstd_rand0
- *    // use a shift of 0 for each
- *    thrust::xor_combine_engine<thrust::minstd_rand,0,thrust::minstd_rand0,0> rng;
- *
- *    // print a random number to standard output
- *    std::cout << rng() << std::endl;
- *
- *    return 0;
- *  }
- *  \endcode
- */
-template<typename Engine1, size_t s1,
-         typename Engine2, size_t s2=0u>
-  class xor_combine_engine
-{
-  public:
-    // types
-
-    /*! \typedef base1_type
-     *  \brief The type of the first adapted base random number engine.
-     */
-    typedef Engine1 base1_type;
-
-    /*! \typedef base2_type
-     *  \brief The type of the second adapted base random number engine.
-     */
-    typedef Engine2 base2_type;
-
-    /*! \typedef result_type
-     *  \brief The type of the unsigned integer produced by this \p xor_combine_engine.
-     */
-    typedef typename thrust::detail::eval_if<
-      (sizeof(typename base2_type::result_type) > sizeof(typename base1_type::result_type)),
-      thrust::detail::identity_<typename base2_type::result_type>,
-      thrust::detail::identity_<typename base1_type::result_type>
-    >::type result_type;
-    
-    /*! The size of the first shift used in the generation algorithm.
-     */
-    static const size_t shift1 = s1;
-
-    /*! The size of the second shift used in the generation algorithm.
-     */
-    static const size_t shift2 = s2;
-
-    /*! The smallest value this \p xor_combine_engine may potentially produce.
-     */
-    static const result_type min = 0;
-
-    /*! The largest value this \p xor_combine_engine may potentially produce.
-     */
-    static const result_type max =
-      detail::xor_combine_engine_max<
-        Engine1, s1, Engine2, s2, result_type
-      >::value;
-
-    // constructors and seeding functions
-
-    /*! This constructor constructs a new \p xor_combine_engine and constructs
-     *  its adapted engines using their null constructors.
-     */
-    __host__ __device__
-    xor_combine_engine(void);
-
-    /*! This constructor constructs a new \p xor_combine_engine using
-     *  given \p base1_type and \p base2_type engines to initialize its adapted base engines.
-     *
-     *  \param urng1 A \p base1_type to use to initialize this \p xor_combine_engine's
-     *         first adapted base engine.
-     *  \param urng2 A \p base2_type to use to initialize this \p xor_combine_engine's
-     *         first adapted base engine.
-     */
-    __host__ __device__
-    xor_combine_engine(const base1_type &urng1, const base2_type &urng2);
-
-    /*! This constructor initializes a new \p xor_combine_engine with a given seed.
-     *  
-     *  \param s The seed used to intialize this \p xor_combine_engine's adapted base engines.
-     */
-    __host__ __device__
-    xor_combine_engine(result_type s);
-
-    /*! This method initializes the state of this \p xor_combine_engine's adapted base engines
-     *  by using their \p default_seed values.
-     */
-    __host__ __device__
-    void seed(void);
-
-    /*! This method initializes the state of this \p xor_combine_engine's adapted base engines
-     *  by using the given seed.
-     *
-     *  \param s The seed with which to intialize this \p xor_combine_engine's adapted base engines.
-     */
-    __host__ __device__
-    void seed(result_type s);
-
-    // generating functions
-
-    /*! This member function produces a new random value and updates this \p xor_combine_engine's state.
-     *  \return A new random number.
-     */
-    __host__ __device__
-    result_type operator()(void);
-
-    /*! This member function advances this \p xor_combine_engine's state a given number of times
-     *  and discards the results.
-     *
-     *  \param z The number of random values to discard.
-     *  \note This function is provided because an implementation may be able to accelerate it.
-     */
-    __host__ __device__
-    void discard(unsigned long long z);
-
-    // property functions
-
-    /*! This member function returns a const reference to this \p xor_combine_engine's
-     *  first adapted base engine.
-     *
-     *  \return A const reference to the first base engine this \p xor_combine_engine adapts.
-     */
-    __host__ __device__
-    const base1_type &base1(void) const;
-
-    /*! This member function returns a const reference to this \p xor_combine_engine's
-     *  second adapted base engine.
-     *
-     *  \return A const reference to the second base engine this \p xor_combine_engine adapts.
-     */
-    __host__ __device__
-    const base2_type &base2(void) const;
-
-    /*! \cond
-     */
-  private:
-    base1_type m_b1;
-    base2_type m_b2;
-
-    friend struct thrust::random::detail::random_core_access;
-
-    __host__ __device__
-    bool equal(const xor_combine_engine &rhs) const;
-
-    template<typename CharT, typename Traits>
-    std::basic_istream<CharT,Traits>& stream_in(std::basic_istream<CharT,Traits> &is);
-
-    template<typename CharT, typename Traits>
-    std::basic_ostream<CharT,Traits>& stream_out(std::basic_ostream<CharT,Traits> &os) const;
-
-    /*! \endcond
-     */
-}; // end xor_combine_engine
-
-
-/*! This function checks two \p xor_combine_engines for equality.
- *  \param lhs The first \p xor_combine_engine to test.
- *  \param rhs The second \p xor_combine_engine to test.
- *  \return \c true if \p lhs is equal to \p rhs; \c false, otherwise.
- */
-template<typename Engine1_, size_t s1_, typename Engine2_, size_t s2_>
-__host__ __device__
-bool operator==(const xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &lhs,
-                const xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &rhs);
-
-
-/*! This function checks two \p xor_combine_engines for inequality.
- *  \param lhs The first \p xor_combine_engine to test.
- *  \param rhs The second \p xor_combine_engine to test.
- *  \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise.
- */
-template<typename Engine1_, size_t s1_, typename Engine2_, size_t s2_>
-__host__ __device__
-bool operator!=(const xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &lhs,
-                const xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &rhs);
-
-
-/*! This function streams a xor_combine_engine to a \p std::basic_ostream.
- *  \param os The \p basic_ostream to stream out to.
- *  \param e The \p xor_combine_engine to stream out.
- *  \return \p os
- */
-template<typename Engine1_, size_t s1_, typename Engine2_, size_t s2_,
-         typename CharT, typename Traits>
-std::basic_ostream<CharT,Traits>&
-operator<<(std::basic_ostream<CharT,Traits> &os,
-           const xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &e);
-
-
-/*! This function streams a xor_combine_engine in from a std::basic_istream.
- *  \param is The \p basic_istream to stream from.
- *  \param e The \p xor_combine_engine to stream in.
- *  \return \p is
- */
-template<typename Engine1_, size_t s1_, typename Engine2_, size_t s2_,
-         typename CharT, typename Traits>
-std::basic_istream<CharT,Traits>&
-operator>>(std::basic_istream<CharT,Traits> &is,
-           xor_combine_engine<Engine1_,s1_,Engine2_,s2_> &e);
-
-
-/*! \} // end random_number_engine_adaptors
- */
-
-
-} // end random
-
-// import names into thrust::
-using random::xor_combine_engine;
-
-} // end thrust
-
-#include <thrust/random/detail/xor_combine_engine.inl>
-
diff --git a/compat/thrust/reduce.h b/compat/thrust/reduce.h
deleted file mode 100644
index 1dc931f9a1..0000000000
--- a/compat/thrust/reduce.h
+++ /dev/null
@@ -1,779 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief Functions for reducing a range to a single value
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reductions
- *  \{
- */
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
- *  reduction. \p reduce is similar to the C++ Standard Template Library's
- *  <tt>std::accumulate</tt>. The primary difference between the two functions
- *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
- *  \p reduce requires associativity of the binary operation to parallelize
- *  the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case operator+) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \return The result of the reduction.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
- *          then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
- *          \c value_type. If \c T is \c InputIterator's \c value_type, then
- *          <tt>T(0)</tt> is defined.
- *
- *  The following code snippet demonstrates how to use \p reduce to compute
- *  the sum of a sequence of integers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(thrust::host, data, data + 6);
- *
- *  // result == 9
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- */
-template<typename DerivedPolicy, typename InputIterator>
-  typename thrust::iterator_traits<InputIterator>::value_type
-    reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec, InputIterator first, InputIterator last);
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \c 0 as the initial value of the
- *  reduction. \p reduce is similar to the C++ Standard Template Library's
- *  <tt>std::accumulate</tt>. The primary difference between the two functions
- *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
- *  \p reduce requires associativity of the binary operation to parallelize
- *  the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case operator+) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \return The result of the reduction.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
- *          then <tt>x + y</tt> is defined and is convertible to \p InputIterator's
- *          \c value_type. If \c T is \c InputIterator's \c value_type, then
- *          <tt>T(0)</tt> is defined.
- *
- *  The following code snippet demonstrates how to use \p reduce to compute
- *  the sum of a sequence of integers.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(data, data + 6);
- *
- *  // result == 9
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- */
-template<typename InputIterator> typename
-  thrust::iterator_traits<InputIterator>::value_type reduce(InputIterator first, InputIterator last);
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
- *  reduction. \p reduce is similar to the C++ Standard Template Library's
- *  <tt>std::accumulate</tt>. The primary difference between the two functions
- *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
- *  \p reduce requires associativity of the binary operation to parallelize
- *  the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case operator+) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param init The initial value.
- *  \return The result of the reduction.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
- *          then <tt>x + y</tt> is defined and is convertible to \p T.
- *  \tparam T is convertible to \p InputIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p reduce to compute
- *  the sum of a sequence of integers including an intialization value using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(thrust::host, data, data + 6, 1);
- *
- *  // result == 10
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- */
-template<typename DerivedPolicy, typename InputIterator, typename T>
-  T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-           InputIterator first,
-           InputIterator last,
-           T init);
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
- *  reduction. \p reduce is similar to the C++ Standard Template Library's
- *  <tt>std::accumulate</tt>. The primary difference between the two functions
- *  is that <tt>std::accumulate</tt> guarantees the order of summation, while
- *  \p reduce requires associativity of the binary operation to parallelize
- *  the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case operator+) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param init The initial value.
- *  \return The result of the reduction.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and if \c x and \c y are objects of \p InputIterator's \c value_type,
- *          then <tt>x + y</tt> is defined and is convertible to \p T.
- *  \tparam T is convertible to \p InputIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p reduce to compute
- *  the sum of a sequence of integers including an intialization value.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(data, data + 6, 1);
- *
- *  // result == 10
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- */
-template<typename InputIterator, typename T>
-  T reduce(InputIterator first,
-           InputIterator last,
-           T init);
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
- *  reduction and \p binary_op as the binary function used for summation. \p reduce
- *  is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
- *  The primary difference between the two functions is that <tt>std::accumulate</tt>
- *  guarantees the order of summation, while \p reduce requires associativity of
- *  \p binary_op to parallelize the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case \p binary_op) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param init The initial value.
- *  \param binary_op The binary function used to 'sum' values.
- *  \return The result of the reduction.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \c InputIterator's \c value_type is convertible to \c T.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
- *
- *  The following code snippet demonstrates how to use \p reduce to
- *  compute the maximum value of a sequence of integers using the \p thrust::host execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(thrust::host,
- *                              data, data + 6,
- *                              -1,
- *                              thrust::maximum<int>());
- *  // result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- *  \see transform_reduce
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename T,
-         typename BinaryFunction>
-  T reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-           InputIterator first,
-           InputIterator last,
-           T init,
-           BinaryFunction binary_op);
-
-
-/*! \p reduce is a generalization of summation: it computes the sum (or some
- *  other binary operation) of all the elements in the range <tt>[first,
- *  last)</tt>. This version of \p reduce uses \p init as the initial value of the
- *  reduction and \p binary_op as the binary function used for summation. \p reduce
- *  is similar to the C++ Standard Template Library's <tt>std::accumulate</tt>.
- *  The primary difference between the two functions is that <tt>std::accumulate</tt>
- *  guarantees the order of summation, while \p reduce requires associativity of
- *  \p binary_op to parallelize the reduction.
- *
- *  Note that \p reduce also assumes that the binary reduction operator (in this
- *  case \p binary_op) is commutative.  If the reduction operator is not commutative
- *  then \p thrust::reduce should not be used.  Instead, one could use 
- *  \p inclusive_scan (which does not require commutativity) and select the
- *  last element of the output array.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param init The initial value.
- *  \param binary_op The binary function used to 'sum' values.
- *  \return The result of the reduction.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *          and \c InputIterator's \c value_type is convertible to \c T.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
- *
- *  The following code snippet demonstrates how to use \p reduce to
- *  compute the maximum value of a sequence of integers.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *  int result = thrust::reduce(data, data + 6,
- *                              -1,
- *                              thrust::maximum<int>());
- *  // result == 3
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/accumulate.html
- *  \see transform_reduce
- */
-template<typename InputIterator,
-         typename T,
-         typename BinaryFunction>
-  T reduce(InputIterator first,
-           InputIterator last,
-           T init,
-           BinaryFunction binary_op);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c plus and the result copied to \c values_output. 
- *
- *  This version of \p reduce_by_key uses the function object \c equal_to
- *  to test for equality and \c plus to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c plus and the result copied to \c values_output. 
- *
- *  This version of \p reduce_by_key uses the function object \c equal_to
- *  to test for equality and \c plus to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::reduce_by_key(A, A + N, B, C, D);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c plus and the result copied to \c values_output. 
- *
- *  This version of \p reduce_by_key uses the function object \c binary_pred
- *  to test for equality and \c plus to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c plus and the result copied to \c values_output. 
- *
- *  This version of \p reduce_by_key uses the function object \c binary_pred
- *  to test for equality and \c plus to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c BinaryFunction \c binary_op and the result copied to \c values_output. 
- *  Specifically, if consecutive key iterators \c i and \c (i + 1) are 
- *  such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
- *  values are reduced to a single value with \c binary_op.
- *
- *  This version of \p reduce_by_key uses the function object \c binary_pred
- *  to test for equality and \c binary_op to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \param binary_op The binary function used to accumulate values.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *          and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int> binary_op;
- *  new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op);
-
-
-/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p reduce_by_key copies the first element of the group to the
- *  \c keys_output. The corresponding values in the range are reduced using the
- *  \c BinaryFunction \c binary_op and the result copied to \c values_output. 
- *  Specifically, if consecutive key iterators \c i and \c (i + 1) are 
- *  such that <tt>binary_pred(*i, *(i+1))</tt> is \c true, then the corresponding
- *  values are reduced to a single value with \c binary_op.
- *
- *  This version of \p reduce_by_key uses the function object \c binary_pred
- *  to test for equality and \c binary_op to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_output The beginning of the output key range.
- *  \param values_output The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \param binary_op The binary function used to accumulate values.
- *  \return A pair of iterators at end of the ranges <tt>[keys_output, keys_output_last)</tt> and <tt>[values_output, values_output_last)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *          and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p reduce_by_key to
- *  compact a sequence of key/value pairs and sum values with equal keys.
- *
- *  \code
- *  #include <thrust/reduce.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int> binary_op;
- *  new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.
- *  \endcode
- *  
- *  \see reduce
- *  \see unique_copy
- *  \see unique_by_key
- *  \see unique_by_key_copy
- */
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op);
-
-
-/*! \} // end reductions
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/reduce.inl>
-
diff --git a/compat/thrust/remove.h b/compat/thrust/remove.h
deleted file mode 100644
index c538776258..0000000000
--- a/compat/thrust/remove.h
+++ /dev/null
@@ -1,800 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file remove.h
- *  \brief Functions for removing elements from a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup stream_compaction Stream Compaction
- *  \ingroup reordering
- *  \{
- *
- */
-
-
-/*! \p remove removes from the range <tt>[first, last)</tt> all elements that are
- *  equal to \p value. That is, \p remove returns an iterator \p new_last such
- *  that the range <tt>[first, new_last)</tt> contains no elements equal to
- *  \p value. The iterators in the range <tt>[new_first,last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified. \p remove
- *  is stable, meaning that the relative order of elements that are not equal to
- *  \p value is unchanged.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param value The value to remove from the range <tt>[first, last)</tt>.
- *         Elements which are equal to value are removed from the sequence.
- *  \return A \p ForwardIterator pointing to the end of the resulting range of
- *          elements which are not equal to \p value.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p remove to remove a number
- *  of interest from a range using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {3, 1, 4, 1, 5, 9};
- *  int *new_end = thrust::remove(A, A + N, 1);
- *  // The first four values of A are now {3, 4, 5, 9}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any
- *  iterators, and does not change the distance between \p first and \p last.
- *  (There's no way that it could do anything of the sort.) So, for example, if
- *  \c V is a device_vector, <tt>remove(V.begin(), V.end(), 0)</tt> does not
- *  change <tt>V.size()</tt>: \c V will contain just as many elements as it did
- *  before. \p remove returns an iterator that points to the end of the resulting
- *  range after elements have been removed from it; it follows that the elements
- *  after that iterator are of no interest, and may be discarded. If you are
- *  removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a>, you may
- *  simply erase them. That is, a reasonable way of removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a> is
- *  <tt>S.erase(remove(S.begin(), S.end(), x), S.end())</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove.html
- *  \see remove_if
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value);
-
-
-/*! \p remove removes from the range <tt>[first, last)</tt> all elements that are
- *  equal to \p value. That is, \p remove returns an iterator \p new_last such
- *  that the range <tt>[first, new_last)</tt> contains no elements equal to
- *  \p value. The iterators in the range <tt>[new_first,last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified. \p remove
- *  is stable, meaning that the relative order of elements that are not equal to
- *  \p value is unchanged.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param value The value to remove from the range <tt>[first, last)</tt>.
- *         Elements which are equal to value are removed from the sequence.
- *  \return A \p ForwardIterator pointing to the end of the resulting range of
- *          elements which are not equal to \p value.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p remove to remove a number
- *  of interest from a range.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {3, 1, 4, 1, 5, 9};
- *  int *new_end = thrust::remove(A, A + N, 1);
- *  // The first four values of A are now {3, 4, 5, 9}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any
- *  iterators, and does not change the distance between \p first and \p last.
- *  (There's no way that it could do anything of the sort.) So, for example, if
- *  \c V is a device_vector, <tt>remove(V.begin(), V.end(), 0)</tt> does not
- *  change <tt>V.size()</tt>: \c V will contain just as many elements as it did
- *  before. \p remove returns an iterator that points to the end of the resulting
- *  range after elements have been removed from it; it follows that the elements
- *  after that iterator are of no interest, and may be discarded. If you are
- *  removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a>, you may
- *  simply erase them. That is, a reasonable way of removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a> is
- *  <tt>S.erase(remove(S.begin(), S.end(), x), S.end())</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove.html
- *  \see remove_if
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value);
-
-
-/*! \p remove_copy copies elements that are not equal to \p value from the range
- *  <tt>[first, last)</tt> to a range beginning at \p result. The return value is
- *  the end of the resulting range. This operation is stable, meaning that the
- *  relative order of the elements that are copied is the same as in
- *  the range <tt>[first, last)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param value The value to omit from the copied range.
- *  \return An OutputIterator pointing to the end of the resulting range of elements
- *          which are not equal to \p value.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable">Equality Comparable</a>,
- *          and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy to copy
- *  a sequence of numbers to an output range while omitting a value of interest using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[N-2];
- *  thrust::remove_copy(thrust::host, V, V + N, result, 0);
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-2, -1, 1, 2}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy.html
- *  \see remove
- *  \see remove_if
- *  \see remove_copy_if
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value);
-
-
-/*! \p remove_copy copies elements that are not equal to \p value from the range
- *  <tt>[first, last)</tt> to a range beginning at \p result. The return value is
- *  the end of the resulting range. This operation is stable, meaning that the
- *  relative order of the elements that are copied is the same as in
- *  the range <tt>[first, last)</tt>.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param value The value to omit from the copied range.
- *  \return An OutputIterator pointing to the end of the resulting range of elements
- *          which are not equal to \p value.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable">Equality Comparable</a>,
- *          and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy to copy
- *  a sequence of numbers to an output range while omitting a value of interest.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[N-2];
- *  thrust::remove_copy(V, V + N, result, 0);
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-2, -1, 1, 2}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy.html
- *  \see remove
- *  \see remove_if
- *  \see remove_copy_if
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value);
-
-
-/*! \p remove_if removes from the range <tt>[first, last)</tt> every element \p x
- *  such that <tt>pred(x)</tt> is \c true. That is, \p remove_if returns an
- *  iterator \c new_last such that the range <tt>[first,new_last)</tt> contains
- *  no elements for which \p pred is \c true. The iterators in the range
- *  <tt>[new_last,last)</tt> are all still dereferenceable, but the elements that
- *  they point to are unspecified. \p remove_if is stable, meaning that the
- *  relative order of elements that are not removed is unchanged.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param pred A predicate to evaluate for each element of the range
- *              <tt>[first,last)</tt>. Elements for which \p pred evaluates to
- *              \c false are removed from the sequence.
- *  \return A ForwardIterator pointing to the end of the resulting range of
- *          elements for which \p pred evaluated to \c true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p remove_if to remove
- *  all even numbers from an array of integers using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  int *new_end = thrust::remove_if(thrust::host, A, A + N, is_even());
- *  // The first three values of A are now {1, 5, 7}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The meaning of "removal" is somewhat subtle. \p remove_if does not
- *  destroy any iterators, and does not change the distance between \p first and
- *  \p last. (There's no way that it could do anything of the sort.) So, for
- *  example, if \c V is a device_vector,
- *  <tt>remove_if(V.begin(), V.end(), pred)</tt> does not change
- *  <tt>V.size()</tt>: \c V will contain just as many elements as it did before.
- *  \p remove_if returns an iterator that points to the end of the resulting
- *  range after elements have been removed from it; it follows that the elements
- *  after that iterator are of no interest, and may be discarded. If you are
- *  removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a>, you may
- *  simply erase them. That is, a reasonable way of removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a> is
- *  <tt>S.erase(remove_if(S.begin(), S.end(), pred), S.end())</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-/*! \p remove_if removes from the range <tt>[first, last)</tt> every element \p x
- *  such that <tt>pred(x)</tt> is \c true. That is, \p remove_if returns an
- *  iterator \c new_last such that the range <tt>[first,new_last)</tt> contains
- *  no elements for which \p pred is \c true. The iterators in the range
- *  <tt>[new_last,last)</tt> are all still dereferenceable, but the elements that
- *  they point to are unspecified. \p remove_if is stable, meaning that the
- *  relative order of elements that are not removed is unchanged.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param pred A predicate to evaluate for each element of the range
- *              <tt>[first,last)</tt>. Elements for which \p pred evaluates to
- *              \c false are removed from the sequence.
- *  \return A ForwardIterator pointing to the end of the resulting range of
- *          elements for which \p pred evaluated to \c true.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/Predicate.html">Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p remove_if to remove
- *  all even numbers from an array of integers.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  int *new_end = thrust::remove_if(A, A + N, is_even());
- *  // The first three values of A are now {1, 5, 7}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The meaning of "removal" is somewhat subtle. \p remove_if does not
- *  destroy any iterators, and does not change the distance between \p first and
- *  \p last. (There's no way that it could do anything of the sort.) So, for
- *  example, if \c V is a device_vector,
- *  <tt>remove_if(V.begin(), V.end(), pred)</tt> does not change
- *  <tt>V.size()</tt>: \c V will contain just as many elements as it did before.
- *  \p remove_if returns an iterator that points to the end of the resulting
- *  range after elements have been removed from it; it follows that the elements
- *  after that iterator are of no interest, and may be discarded. If you are
- *  removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a>, you may
- *  simply erase them. That is, a reasonable way of removing elements from a
- *  <a href="http://www.sgi.com/tech/stl/Sequence.html">Sequence</a> is
- *  <tt>S.erase(remove_if(S.begin(), S.end(), pred), S.end())</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-/*! \p remove_copy_if copies elements from the range <tt>[first,last)</tt> to a
- *  range beginning at \p result, except that elements for which \p pred is
- *  \c true are not copied. The return value is the end of the resulting range.
- *  This operation is stable, meaning that the relative order of the elements that
- *  are copied is the same as the range <tt>[first,last)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param pred A predicate to evaluate for each element of the range <tt>[first,last)</tt>.
- *              Elements for which \p pred evaluates to \c false are not copied
- *              to the resulting sequence.
- *  \return An OutputIterator pointing to the end of the resulting range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy_if to copy
- *  a sequence of numbers to an output range while omitting even numbers using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[2];
- *  thrust::remove_copy_if(thrust::host, V, V + N, result, is_even());
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_if
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-/*! \p remove_copy_if copies elements from the range <tt>[first,last)</tt> to a
- *  range beginning at \p result, except that elements for which \p pred is
- *  \c true are not copied. The return value is the end of the resulting range.
- *  This operation is stable, meaning that the relative order of the elements that
- *  are copied is the same as the range <tt>[first,last)</tt>.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param pred A predicate to evaluate for each element of the range <tt>[first,last)</tt>.
- *              Elements for which \p pred evaluates to \c false are not copied
- *              to the resulting sequence.
- *  \return An OutputIterator pointing to the end of the resulting range.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy_if to copy
- *  a sequence of numbers to an output range while omitting even numbers.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(const int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int result[2];
- *  thrust::remove_copy_if(V, V + N, result, is_even());
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_if
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-/*! \p remove_if removes from the range <tt>[first, last)</tt> every element \p x
- *  such that <tt>pred(x)</tt> is \c true. That is, \p remove_if returns an
- *  iterator \c new_last such that the range <tt>[first, new_last)</tt> contains
- *  no elements for which \p pred of the corresponding stencil value is \c true. 
- *  The iterators in the range <tt>[new_last,last)</tt> are all still dereferenceable,
- *  but the elements that they point to are unspecified. \p remove_if is stable,
- *  meaning that the relative order of elements that are not removed is unchanged.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A predicate to evaluate for each element of the range
- *              <tt>[stencil, stencil + (last - first))</tt>. Elements for which \p pred evaluates to
- *              \c false are removed from the sequence <tt>[first, last)</tt>
- *  \return A ForwardIterator pointing to the end of the resulting range of
- *          elements for which \p pred evaluated to \c true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/ForwardIterator.html">Forward Iterator</a>
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_if to remove
- *  specific elements from an array of integers using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  int S[N] = {0, 1, 1, 1, 0, 0};
- *
- *  int *new_end = thrust::remove(thrust::host, A, A + N, S, thrust::identity<int>());
- *  // The first three values of A are now {1, 5, 7}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The range <tt>[first, last)</tt> is not permitted to overlap with the range <tt>[stencil, stencil + (last - first))</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-/*! \p remove_if removes from the range <tt>[first, last)</tt> every element \p x
- *  such that <tt>pred(x)</tt> is \c true. That is, \p remove_if returns an
- *  iterator \c new_last such that the range <tt>[first, new_last)</tt> contains
- *  no elements for which \p pred of the corresponding stencil value is \c true. 
- *  The iterators in the range <tt>[new_last,last)</tt> are all still dereferenceable,
- *  but the elements that they point to are unspecified. \p remove_if is stable,
- *  meaning that the relative order of elements that are not removed is unchanged.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred A predicate to evaluate for each element of the range
- *              <tt>[stencil, stencil + (last - first))</tt>. Elements for which \p pred evaluates to
- *              \c false are removed from the sequence <tt>[first, last)</tt>
- *  \return A ForwardIterator pointing to the end of the resulting range of
- *          elements for which \p pred evaluated to \c true.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/ForwardIterator.html">Forward Iterator</a>
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *  \pre The range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_if to remove
- *  specific elements from an array of integers.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  int S[N] = {0, 1, 1, 1, 0, 0};
- *
- *  int *new_end = thrust::remove(A, A + N, S, thrust::identity<int>());
- *  // The first three values of A are now {1, 5, 7}
- *  // Values beyond new_end are unspecified
- *  \endcode
- *
- *  \note The range <tt>[first, last)</tt> is not permitted to overlap with the range <tt>[stencil, stencil + (last - first))</tt>.
- *
- *  \see http://www.sgi.com/tech/stl/remove_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_copy_if
- */
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-/*! \p remove_copy_if copies elements from the range <tt>[first,last)</tt> to a
- *  range beginning at \p result, except that elements for which \p pred of the 
- *  corresponding stencil value is \c true are not copied. The return value is 
- *  the end of the resulting range.  This operation is stable, meaning that the
- *  relative order of the elements that are copied is the same as the 
- *  range <tt>[first,last)</tt>.
- *
- *  The algorithm's execution policy is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param pred A predicate to evaluate for each element of the range <tt>[first,last)</tt>.
- *              Elements for which \p pred evaluates to \c false are not copied
- *              to the resulting sequence.
- *  \return An OutputIterator pointing to the end of the resulting range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy_if to copy
- *  a sequence of numbers to an output range while omitting specific elements using the \p thrust::host
- *  execution policy for parallelization.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int S[N] = { 1, 1,  0, 1, 0, 1};
- *  int result[2];
- *  thrust::remove_copy_if(thrust::host, V, V + N, S, result, thrust::identity<int>());
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_if
- *  \see copy_if
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-/*! \p remove_copy_if copies elements from the range <tt>[first,last)</tt> to a
- *  range beginning at \p result, except that elements for which \p pred of the 
- *  corresponding stencil value is \c true are not copied. The return value is 
- *  the end of the resulting range.  This operation is stable, meaning that the
- *  relative order of the elements that are copied is the same as the 
- *  range <tt>[first,last)</tt>.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The resulting range is copied to the sequence beginning at this
- *                location.
- *  \param pred A predicate to evaluate for each element of the range <tt>[first,last)</tt>.
- *              Elements for which \p pred evaluates to \c false are not copied
- *              to the resulting sequence.
- *  \return An OutputIterator pointing to the end of the resulting range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p remove_copy_if to copy
- *  a sequence of numbers to an output range while omitting specific elements.
- *
- *  \code
- *  #include <thrust/remove.h>
- *  ...
- *  const int N = 6;
- *  int V[N] = {-2, 0, -1, 0, 1, 2};
- *  int S[N] = { 1, 1,  0, 1, 0, 1};
- *  int result[2];
- *  thrust::remove_copy_if(V, V + N, S, result, thrust::identity<int>());
- *  // V remains {-2, 0, -1, 0, 1, 2}
- *  // result is now {-1, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/remove_copy_if.html
- *  \see remove
- *  \see remove_copy
- *  \see remove_if
- *  \see copy_if
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-/*! \} // end stream_compaction
- */
-
-
-} // end thrust
-
-#include <thrust/detail/remove.inl>
-
diff --git a/compat/thrust/replace.h b/compat/thrust/replace.h
deleted file mode 100644
index 48e3e49e6c..0000000000
--- a/compat/thrust/replace.h
+++ /dev/null
@@ -1,817 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file replace.h
- *  \brief Functions for replacing elements in a range with a particular value
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations
- *  \addtogroup replacing
- *  \ingroup transformations
- *  \{
- */
-
-
-/*! \p replace replaces every element in the range [first, last) equal to \p old_value
- *  with \p new_value. That is: for every iterator \c i, if <tt>*i == old_value</tt>
- *  then it performs the <tt>assignment *i = new_value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param old_value The value to replace.
- *  \param new_value The new value to replace \p old_value.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html>Assignable.html">Assignable</a>,
- *          \p T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">EqualityComparable</a>,
- *          objects of \p T may be compared for equality with objects of
- *          \p ForwardIterator's \c value_type,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace to replace
- *  a value of interest in a \c device_vector with another using the \p thrust::device
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] = 1;
- *  A[1] = 2;
- *  A[2] = 3;
- *  A[3] = 1;
- *
- *  thrust::replace(thrust::device, A.begin(), A.end(), 1, 99);
- *
- *  // A contains [99, 2, 3, 99]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace.html
- *  \see \c replace_if
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void replace(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               ForwardIterator first, ForwardIterator last,
-               const T &old_value,
-               const T &new_value);
-
-
-/*! \p replace replaces every element in the range [first, last) equal to \p old_value
- *  with \p new_value. That is: for every iterator \c i, if <tt>*i == old_value</tt>
- *  then it performs the <tt>assignment *i = new_value</tt>.
- *
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param old_value The value to replace.
- *  \param new_value The new value to replace \p old_value.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html>Assignable.html">Assignable</a>,
- *          \p T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">EqualityComparable</a>,
- *          objects of \p T may be compared for equality with objects of
- *          \p ForwardIterator's \c value_type,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace to replace
- *  a value of interest in a \c device_vector with another.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] = 1;
- *  A[1] = 2;
- *  A[2] = 3;
- *  A[3] = 1;
- *
- *  thrust::replace(A.begin(), A.end(), 1, 99);
- *
- *  // A contains [99, 2, 3, 99]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace.html
- *  \see \c replace_if
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename ForwardIterator, typename T>
-  void replace(ForwardIterator first, ForwardIterator last, const T &old_value,
-               const T &new_value);
-
-
-/*! \p replace_if replaces every element in the range <tt>[first, last)</tt> for which
- *  \p pred returns \c true with \p new_value. That is: for every iterator \c i, if
- *  <tt>pred(*i)</tt> is \c true then it performs the assignment <tt>*i = new_value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The new value to replace elements which <tt>pred(*i)</tt> evaluates
- *         to \c true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace_if to replace
- *  a \c device_vector's negative elements with \c 0 using the \p thrust::device execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  1;
- *  A[1] = -3;
- *  A[2] =  2;
- *  A[3] = -1;
- *
- *  is_less_than_zero pred;
- *
- *  thrust::replace_if(thrust::device, A.begin(), A.end(), pred, 0);
- *
- *  // A contains [1, 0, 2, 0]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_if.html
- *  \see \c replace
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  ForwardIterator first, ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value);
-
-
-/*! \p replace_if replaces every element in the range <tt>[first, last)</tt> for which
- *  \p pred returns \c true with \p new_value. That is: for every iterator \c i, if
- *  <tt>pred(*i)</tt> is \c true then it performs the assignment <tt>*i = new_value</tt>.
- *
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The new value to replace elements which <tt>pred(*i)</tt> evaluates
- *         to \c true.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace_if to replace
- *  a \c device_vector's negative elements with \c 0.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  1;
- *  A[1] = -3;
- *  A[2] =  2;
- *  A[3] = -1;
- *
- *  is_less_than_zero pred;
- *
- *  thrust::replace_if(A.begin(), A.end(), pred, 0);
- *
- *  // A contains [1, 0, 2, 0]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_if.html
- *  \see \c replace
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(ForwardIterator first, ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value);
-
-
-/*! \p replace_if replaces every element in the range <tt>[first, last)</tt> for which
- *  <tt>pred(*s)</tt> returns \c true with \p new_value. That is: for every iterator
- *  \c i in the range <tt>[first, last)</tt>, and \c s in the range <tt>[stencil, stencil + (last - first))</tt>,
- *  if <tt>pred(*s)</tt> is \c true then it performs the assignment <tt>*i = new_value</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The new value to replace elements which <tt>pred(*i)</tt> evaluates
- *         to \c true.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace_if to replace
- *  a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero
- *  using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *  
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  10;
- *  A[1] =  20;
- *  A[2] =  30;
- *  A[3] =  40;
- *
- *  thrust::device_vector<int> S(4);
- *  S[0] = -1;
- *  S[1] =  0;
- *  S[2] = -1;
- *  S[3] =  0;
- *
- *  is_less_than_zero pred;
- *  thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), pred, 0);
- *
- *  // A contains [0, 20, 0, 40]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_if.html
- *  \see \c replace
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  ForwardIterator first, ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value);
-
-
-/*! \p replace_if replaces every element in the range <tt>[first, last)</tt> for which
- *  <tt>pred(*s)</tt> returns \c true with \p new_value. That is: for every iterator
- *  \c i in the range <tt>[first, last)</tt>, and \c s in the range <tt>[stencil, stencil + (last - first))</tt>,
- *  if <tt>pred(*s)</tt> is \c true then it performs the assignment <tt>*i = new_value</tt>.
- *
- *  \param first The beginning of the sequence of interest.
- *  \param last The end of the sequence of interest.
- *  \param stencil The beginning of the stencil sequence.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The new value to replace elements which <tt>pred(*i)</tt> evaluates
- *         to \c true.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p replace_if to replace
- *  a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *  
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  10;
- *  A[1] =  20;
- *  A[2] =  30;
- *  A[3] =  40;
- *
- *  thrust::device_vector<int> S(4);
- *  S[0] = -1;
- *  S[1] =  0;
- *  S[2] = -1;
- *  S[3] =  0;
- *
- *  is_less_than_zero pred;
- *  thrust::replace_if(A.begin(), A.end(), S.begin(), pred, 0);
- *
- *  // A contains [0, 20, 0, 40]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_if.html
- *  \see \c replace
- *  \see \c replace_copy
- *  \see \c replace_copy_if
- */
-template<typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(ForwardIterator first, ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value);
-
-
-/*! \p replace_copy copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element equal to \p old_value
- *  is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>, \p replace_copy
- *  performs the assignment <tt>*(result+n) = new_value</tt> if <tt>*(first+n) == old_value</tt>,
- *  and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param result The beginning of the sequence to copy to.
- *  \param old_value The value to replace.
- *  \param new_value The replacement value for which <tt>*i == old_value</tt> evaluates to \c true.
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          \p T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          \p T may be compared for equality with \p InputIterator's \c value_type,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> A(4);
- *  A[0] = 1;
- *  A[1] = 2;
- *  A[2] = 3;
- *  A[3] = 1;
- *
- *  thrust::device_vector<int> B(4);
- *
- *  thrust::replace_copy(thrust::device, A.begin(), A.end(), B.begin(), 1, 99);
- *
- *  // B contains [99, 2, 3, 99]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_copy.html
- *  \see \c copy
- *  \see \c replace
- *  \see \c replace_if
- *  \see \c replace_copy_if
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              InputIterator first, InputIterator last,
-                              OutputIterator result,
-                              const T &old_value,
-                              const T &new_value);
-
-
-/*! \p replace_copy copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element equal to \p old_value
- *  is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>, \p replace_copy
- *  performs the assignment <tt>*(result+n) = new_value</tt> if <tt>*(first+n) == old_value</tt>,
- *  and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param result The beginning of the sequence to copy to.
- *  \param old_value The value to replace.
- *  \param new_value The replacement value for which <tt>*i == old_value</tt> evaluates to \c true.
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          \p T is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>,
- *          \p T may be compared for equality with \p InputIterator's \c value_type,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> A(4);
- *  A[0] = 1;
- *  A[1] = 2;
- *  A[2] = 3;
- *  A[3] = 1;
- *
- *  thrust::device_vector<int> B(4);
- *
- *  thrust::replace_copy(A.begin(), A.end(), B.begin(), 1, 99);
- *
- *  // B contains [99, 2, 3, 99]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_copy.html
- *  \see \c copy
- *  \see \c replace
- *  \see \c replace_if
- *  \see \c replace_copy_if
- */
-template<typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(InputIterator first, InputIterator last,
-                              OutputIterator result, const T &old_value,
-                              const T &new_value);
-
-
-/*! \p replace_copy_if copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element for which \p pred
- *  is \c true is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that 0 <= n < last-first,
- *  \p replace_copy_if performs the assignment <tt>*(result+n) = new_value</tt> if
- *  <tt>pred(*(first+n))</tt>, and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param result The beginning of the sequence to copy to.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The replacement value to assign <tt>pred(*i)</tt> evaluates to \c true.
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  1;
- *  A[1] = -3;
- *  A[2] =  2;
- *  A[3] = -1;
- 
- *  thrust::device_vector<int> B(4);
- *  is_less_than_zero pred;
- *
- *  thrust::replace_copy_if(thrust::device, A.begin(), A.end(), B.begin(), pred, 0);
- *
- *  // B contains [1, 0, 2, 0]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_copy_if.html
- *  \see \c replace
- *  \see \c replace_if
- *  \see \c replace_copy
- */
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                 InputIterator first, InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-/*! \p replace_copy_if copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element for which \p pred
- *  is \c true is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that 0 <= n < last-first,
- *  \p replace_copy_if performs the assignment <tt>*(result+n) = new_value</tt> if
- *  <tt>pred(*(first+n))</tt>, and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param result The beginning of the sequence to copy to.
- *  \param pred The predicate to test on every value of the range <tt>[first,last)</tt>.
- *  \param new_value The replacement value to assign <tt>pred(*i)</tt> evaluates to \c true.
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  1;
- *  A[1] = -3;
- *  A[2] =  2;
- *  A[3] = -1;
- 
- *  thrust::device_vector<int> B(4);
- *  is_less_than_zero pred;
- *
- *  thrust::replace_copy_if(A.begin(), A.end(), B.begin(), pred, 0);
- *
- *  // B contains [1, 0, 2, 0]
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/replace_copy_if.html
- *  \see \c replace
- *  \see \c replace_if
- *  \see \c replace_copy
- */
-template<typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(InputIterator first, InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-/*! This version of \p replace_copy_if copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element whose corresponding stencil
- *  element causes \p pred to be \c true is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p replace_copy_if performs the assignment <tt>*(result+n) = new_value</tt> if
- *  <tt>pred(*(stencil+n))</tt>, and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the sequence to copy to.
- *  \param pred The predicate to test on every value of the range <tt>[stencil, stencil + (last - first))</tt>.
- *  \param new_value The replacement value to assign when <tt>pred(*s)</tt> evaluates to \c true. 
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *  \pre \p stencil may equal \p result, but the ranges <tt>[stencil, stencil + (last - first))</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *  
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  10;
- *  A[1] =  20;
- *  A[2] =  30;
- *  A[3] =  40;
- *
- *  thrust::device_vector<int> S(4);
- *  S[0] = -1;
- *  S[1] =  0;
- *  S[2] = -1;
- *  S[3] =  0;
- *
- *  thrust::device_vector<int> B(4);
- *  is_less_than_zero pred;
- *
- *  thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), B.begin(), pred, 0);
- *
- *  // B contains [0, 20, 0, 40]
- *  \endcode
- *
- *  \see \c replace_copy
- *  \see \c replace_if
- */
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                 InputIterator1 first, InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-/*! This version of \p replace_copy_if copies elements from the range <tt>[first, last)</tt> to the range
- *  <tt>[result, result + (last-first))</tt>, except that any element whose corresponding stencil
- *  element causes \p pred to be \c true is not copied; \p new_value is copied instead.
- *
- *  More precisely, for every integer \c n such that <tt>0 <= n < last-first</tt>,
- *  \p replace_copy_if performs the assignment <tt>*(result+n) = new_value</tt> if
- *  <tt>pred(*(stencil+n))</tt>, and <tt>*(result+n) = *(first+n)</tt> otherwise.
- *
- *  \param first The beginning of the sequence to copy from.
- *  \param last The end of the sequence to copy from.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the sequence to copy to.
- *  \param pred The predicate to test on every value of the range <tt>[stencil, stencil + (last - first))</tt>.
- *  \param new_value The replacement value to assign when <tt>pred(*s)</tt> evaluates to \c true. 
- *  \return <tt>result + (last-first)</tt>
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the ranges <tt>[first, last)</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *  \pre \p stencil may equal \p result, but the ranges <tt>[stencil, stencil + (last - first))</tt> and <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  \code
- *  #include <thrust/replace.h>
- *  #include <thrust/device_vector.h>
- *
- *  struct is_less_than_zero
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x < 0;
- *    }
- *  };
- *  
- *  ...
- *  
- *  thrust::device_vector<int> A(4);
- *  A[0] =  10;
- *  A[1] =  20;
- *  A[2] =  30;
- *  A[3] =  40;
- *
- *  thrust::device_vector<int> S(4);
- *  S[0] = -1;
- *  S[1] =  0;
- *  S[2] = -1;
- *  S[3] =  0;
- *
- *  thrust::device_vector<int> B(4);
- *  is_less_than_zero pred;
- *
- *  thrust::replace_if(A.begin(), A.end(), S.begin(), B.begin(), pred, 0);
- *
- *  // B contains [0, 20, 0, 40]
- *  \endcode
- *
- *  \see \c replace_copy
- *  \see \c replace_if
- */
-template<typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-/*! \} // end replacing
- *  \} // transformations
- */
-
-
-} // end thrust
-
-#include <thrust/detail/replace.inl>
-
diff --git a/compat/thrust/reverse.h b/compat/thrust/reverse.h
deleted file mode 100644
index ba50c5d05a..0000000000
--- a/compat/thrust/reverse.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reverse.h
- *  \brief Reverses the order of a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reordering
- *  \ingroup algorithms
- */
-
-
-/*! \p reverse reverses a range. That is: for every <tt>i</tt> such that
- *  <tt>0 <= i <= (last - first) / 2</tt>, it exchanges <tt>*(first + i)</tt>
- *  and <tt>*(last - (i + 1))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range to reverse.
- *  \param last The end of the range to reverse.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam BidirectionalIterator is a model of <a href="http://www.sgi.com/tech/stl/BidirectionalIterator.html">Bidirectional Iterator</a> and
- *          \p BidirectionalIterator is mutable.
- *
- *  The following code snippet demonstrates how to use \p reverse to reverse a
- *  \p device_vector of integers using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/reverse.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int data[N] = {0, 1, 2, 3, 4, 5};
- *  thrust::device_vector<int> v(data, data + N);
- *  thrust::reverse(thrust::device, v.begin(), v.end());
- *  // v is now {5, 4, 3, 2, 1, 0}
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/reverse.html
- *  \see \p reverse_copy
- *  \see \p reverse_iterator
- */
-template<typename DerivedPolicy, typename BidirectionalIterator>
-  void reverse(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               BidirectionalIterator first,
-               BidirectionalIterator last);
-
-
-/*! \p reverse reverses a range. That is: for every <tt>i</tt> such that
- *  <tt>0 <= i <= (last - first) / 2</tt>, it exchanges <tt>*(first + i)</tt>
- *  and <tt>*(last - (i + 1))</tt>.
- *
- *  \param first The beginning of the range to reverse.
- *  \param last The end of the range to reverse.
- *
- *  \tparam BidirectionalIterator is a model of <a href="http://www.sgi.com/tech/stl/BidirectionalIterator.html">Bidirectional Iterator</a> and
- *          \p BidirectionalIterator is mutable.
- *
- *  The following code snippet demonstrates how to use \p reverse to reverse a
- *  \p device_vector of integers.
- *
- *  \code
- *  #include <thrust/reverse.h>
- *  ...
- *  const int N = 6;
- *  int data[N] = {0, 1, 2, 3, 4, 5};
- *  thrust::device_vector<int> v(data, data + N);
- *  thrust::reverse(v.begin(), v.end());
- *  // v is now {5, 4, 3, 2, 1, 0}
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/reverse.html
- *  \see \p reverse_copy
- *  \see \p reverse_iterator
- */
-template<typename BidirectionalIterator>
-  void reverse(BidirectionalIterator first,
-               BidirectionalIterator last);
-
-
-/*! \p reverse_copy differs from \ref reverse only in that the reversed range
- *  is written to a different output range, rather than inplace.
- *
- *  \p reverse_copy copies elements from the range <tt>[first, last)</tt> to the
- *  range <tt>[result, result + (last - first))</tt> such that the copy is a 
- *  reverse of the original range. Specifically: for every <tt>i</tt> such that
- *  <tt>0 <= i < (last - first)</tt>, \p reverse_copy performs the assignment
- *  <tt>*(result + (last - first) - i) = *(first + i)</tt>.
- *
- *  The return value is <tt>result + (last - first))</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range to reverse.
- *  \param last The end of the range to reverse.
- *  \param result The beginning of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam BidirectionalIterator is a model of <a href="http://www.sgi.com/tech/stl/BidirectionalIterator.html">Bidirectional Iterator</a>,
- *          and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p reverse_copy to reverse
- *  an input \p device_vector of integers to an output \p device_vector using the \p thrust::device
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/reverse.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int data[N] = {0, 1, 2, 3, 4, 5};
- *  thrust::device_vector<int> input(data, data + N);
- *  thrust::device_vector<int> output(N);
- *  thrust::reverse_copy(thrust::device, v.begin(), v.end(), output.begin());
- *  // input is still {0, 1, 2, 3, 4, 5}
- *  // output is now  {5, 4, 3, 2, 1, 0}
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/reverse_copy.html
- *  \see \p reverse
- *  \see \p reverse_iterator
- */
-template<typename DerivedPolicy, typename BidirectionalIterator, typename OutputIterator>
-  OutputIterator reverse_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result);
-
-
-/*! \p reverse_copy differs from \ref reverse only in that the reversed range
- *  is written to a different output range, rather than inplace.
- *
- *  \p reverse_copy copies elements from the range <tt>[first, last)</tt> to the
- *  range <tt>[result, result + (last - first))</tt> such that the copy is a 
- *  reverse of the original range. Specifically: for every <tt>i</tt> such that
- *  <tt>0 <= i < (last - first)</tt>, \p reverse_copy performs the assignment
- *  <tt>*(result + (last - first) - i) = *(first + i)</tt>.
- *
- *  The return value is <tt>result + (last - first))</tt>.
- *
- *  \param first The beginning of the range to reverse.
- *  \param last The end of the range to reverse.
- *  \param result The beginning of the output range.
- *
- *  \tparam BidirectionalIterator is a model of <a href="http://www.sgi.com/tech/stl/BidirectionalIterator.html">Bidirectional Iterator</a>,
- *          and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p reverse_copy to reverse
- *  an input \p device_vector of integers to an output \p device_vector.
- *
- *  \code
- *  #include <thrust/reverse.h>
- *  ...
- *  const int N = 6;
- *  int data[N] = {0, 1, 2, 3, 4, 5};
- *  thrust::device_vector<int> input(data, data + N);
- *  thrust::device_vector<int> output(N);
- *  thrust::reverse_copy(v.begin(), v.end(), output.begin());
- *  // input is still {0, 1, 2, 3, 4, 5}
- *  // output is now  {5, 4, 3, 2, 1, 0}
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/reverse_copy.html
- *  \see \p reverse
- *  \see \p reverse_iterator
- */
-template<typename BidirectionalIterator, typename OutputIterator>
-  OutputIterator reverse_copy(BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result);
-
-
-/*! \} // end reordering
- */
-
-
-} // end thrust
-
-#include <thrust/detail/reverse.inl>
-
diff --git a/compat/thrust/scan.h b/compat/thrust/scan.h
deleted file mode 100644
index 95074e6b90..0000000000
--- a/compat/thrust/scan.h
+++ /dev/null
@@ -1,1552 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.h
- *  \brief Functions for computing prefix sums
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-
-/*! \addtogroup prefixsums Prefix Sums
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p inclusive_scan computes an inclusive prefix sum operation. The
- *  term 'inclusive' means that each result includes the corresponding
- *  input operand in the partial sum. More precisely, <tt>*first</tt> is 
- *  assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and 
- *  <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on. 
- *  This version of \p inclusive_scan assumes plus as the associative operator.  
- *  When the input and output sequences are the same, the scan is performed 
- *  in-place.
- 
- *  \p inclusive_scan is similar to \c std::partial_sum in the STL.  The primary
- *  difference between the two functions is that \c std::partial_sum guarantees
- *  a serial summation order, while \p inclusive_scan requires associativity of 
- *  the binary operation to parallelize the prefix sum.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined. If \c T is
- *                         \c OutputIterator's \c value_type, then <tt>T(0)</tt> is
- *                         defined.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
- *  prefix sum using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::inclusive_scan(thrust::host, data, data + 6, data); // in-place scan
- *
- *  // data is now {1, 1, 3, 5, 6, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-/*! \p inclusive_scan computes an inclusive prefix sum operation. The
- *  term 'inclusive' means that each result includes the corresponding
- *  input operand in the partial sum. More precisely, <tt>*first</tt> is 
- *  assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and 
- *  <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on. 
- *  This version of \p inclusive_scan assumes plus as the associative operator.  
- *  When the input and output sequences are the same, the scan is performed 
- *  in-place.
- 
- *  \p inclusive_scan is similar to \c std::partial_sum in the STL.  The primary
- *  difference between the two functions is that \c std::partial_sum guarantees
- *  a serial summation order, while \p inclusive_scan requires associativity of 
- *  the binary operation to parallelize the prefix sum.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined. If \c T is
- *                         \c OutputIterator's \c value_type, then <tt>T(0)</tt> is
- *                         defined.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan
- *
- *  \code
- *  #include <thrust/scan.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::inclusive_scan(data, data + 6, data); // in-place scan
- *
- *  // data is now {1, 1, 3, 5, 6, 9}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- *
- */
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-/*! \p inclusive_scan computes an inclusive prefix sum operation. The
- *  term 'inclusive' means that each result includes the corresponding
- *  input operand in the partial sum.  When the input and output sequences 
- *  are the same, the scan is performed in-place.
- *    
- *  \p inclusive_scan is similar to \c std::partial_sum in the STL.  The primary
- *  difference between the two functions is that \c std::partial_sum guarantees
- *  a serial summation order, while \p inclusive_scan requires associativity of 
- *  the binary operation to parallelize the prefix sum.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>
- *                         and \c OutputIterator's \c value_type is convertible to
- *                         both \c AssociativeOperator's \c first_argument_type and
- *                         \c second_argument_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
- *  prefix sum using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::maximum<int> binary_op;
- *
- *  thrust::inclusive_scan(thrust::host, data, data + 10, data, binary_op); // in-place scan
- *
- *  // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                AssociativeOperator binary_op);
-
-
-/*! \p inclusive_scan computes an inclusive prefix sum operation. The
- *  term 'inclusive' means that each result includes the corresponding
- *  input operand in the partial sum.  When the input and output sequences 
- *  are the same, the scan is performed in-place.
- *    
- *  \p inclusive_scan is similar to \c std::partial_sum in the STL.  The primary
- *  difference between the two functions is that \c std::partial_sum guarantees
- *  a serial summation order, while \p inclusive_scan requires associativity of 
- *  the binary operation to parallelize the prefix sum.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>
- *                         and \c OutputIterator's \c value_type is convertible to
- *                         both \c AssociativeOperator's \c first_argument_type and
- *                         \c second_argument_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan
- *
- *  \code
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::maximum<int> binary_op;
- *
- *  thrust::inclusive_scan(data, data + 10, data, binary_op); // in-place scan
- *
- *  // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                AssociativeOperator binary_op);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  <tt>0</tt> is assigned to <tt>*result</tt> and the sum of 
- *  <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
- *  and so on. This version of \p exclusive_scan assumes plus as the 
- *  associative operator and \c 0 as the initial value.  When the input and 
- *  output sequences are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined. If \c T is
- *                         \c OutputIterator's \c value_type, then <tt>T(0)</tt> is
- *                         defined.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
- *  prefix sum using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::exclusive_scan(thrust::host, data, data + 6, data); // in-place scan
- *
- *  // data is now {0, 1, 1, 3, 5, 6}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  <tt>0</tt> is assigned to <tt>*result</tt> and the sum of 
- *  <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
- *  and so on. This version of \p exclusive_scan assumes plus as the 
- *  associative operator and \c 0 as the initial value.  When the input and 
- *  output sequences are the same, the scan is performed in-place.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined. If \c T is
- *                         \c OutputIterator's \c value_type, then <tt>T(0)</tt> is
- *                         defined.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan
- *
- *  \code
- *  #include <thrust/scan.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::exclusive_scan(data, data + 6, data); // in-place scan
- *
- *  // data is now {0, 1, 1, 3, 5, 6}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  \p init is assigned to <tt>*result</tt> and the sum of \p init and 
- *  <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on. 
- *  This version of \p exclusive_scan assumes plus as the associative 
- *  operator but requires an initial value \p init.  When the input and 
- *  output sequences are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param init The initial value.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
- *  prefix sum using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::exclusive_scan(thrust::host, data, data + 6, data, 4); // in-place scan
- *
- *  // data is now {4, 5, 5, 7, 9, 10}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  \p init is assigned to <tt>*result</tt> and the sum of \p init and 
- *  <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on. 
- *  This version of \p exclusive_scan assumes plus as the associative 
- *  operator but requires an initial value \p init.  When the input and 
- *  output sequences are the same, the scan is performed in-place.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param init The initial value.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's
- *                         \c value_type, then <tt>x + y</tt> is defined.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan
- *
- *  \code
- *  #include <thrust/scan.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::exclusive_scan(data, data + 6, data, 4); // in-place scan
- *
- *  // data is now {4, 5, 5, 7, 9, 10}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  \p init is assigned to <tt>\*result</tt> and the value
- *  <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
- *  and so on. This version of the function requires both and associative 
- *  operator and an initial value \p init.  When the input and output
- *  sequences are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param init The initial value.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>
- *                         and \c OutputIterator's \c value_type is convertible to
- *                         both \c AssociativeOperator's \c first_argument_type and
- *                         \c second_argument_type.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
- *  prefix sum using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::maximum<int> binary_op;
- *
- *  thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op); // in-place scan
- *
- *  // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                AssociativeOperator binary_op);
-
-
-/*! \p exclusive_scan computes an exclusive prefix sum operation. The
- *  term 'exclusive' means that each result does not include the 
- *  corresponding input operand in the partial sum.  More precisely,
- *  \p init is assigned to <tt>\*result</tt> and the value
- *  <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
- *  and so on. This version of the function requires both and associative 
- *  operator and an initial value \p init.  When the input and output
- *  sequences are the same, the scan is performed in-place.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param init The initial value.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to
- *                        \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>
- *                         and \c OutputIterator's \c value_type is convertible to
- *                         both \c AssociativeOperator's \c first_argument_type and
- *                         \c second_argument_type.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::maximum<int> binary_op;
- *
- *  thrust::exclusive_scan(data, data + 10, data, 1, binary_op); // in-place scan
- *
- *  // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
- *  \endcode
- *  
- *  \see http://www.sgi.com/tech/stl/partial_sum.html
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                AssociativeOperator binary_op);
-
-
-/*! \addtogroup segmentedprefixsums Segmented Prefix Sums
- *  \ingroup prefixsums
- *  \{
- */
-
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
- *  predicate used to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
- 
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
- *  predicate used to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
-
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key uses the binary predicate 
- *  \c pred to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to 
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec. 
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param binary_pred  The binary predicate used to determine equality of keys.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::equal_to<int> binary_pred;
- *
- *  thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred);
-
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key uses the binary predicate 
- *  \c pred to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to 
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param binary_pred  The binary predicate used to determine equality of keys.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::equal_to<int> binary_pred;
- *
- *  thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred);
-
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key uses the binary predicate 
- *  \c pred to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to 
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key uses the associative operator 
- *  \c binary_op to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param binary_pred  The binary predicate used to determine equality of keys.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int>     binary_op;
- *
- *  thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-
-/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'inclusive' means that each result includes 
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate inclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p inclusive_scan_by_key uses the binary predicate 
- *  \c pred to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
- *  belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to 
- *  different segments otherwise.
- *
- *  This version of \p inclusive_scan_by_key uses the associative operator 
- *  \c binary_op to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param binary_pred  The binary predicate used to determine equality of keys.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p inclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int>     binary_op;
- *
- *  thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan
- *
- *  // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
- *  \endcode
- *
- *  \see inclusive_scan
- *  \see exclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive segmented prefix 
- *
- *  This version of \p exclusive_scan_by_key uses the value \c 0 to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- * 
- *  This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
- *  predicate used to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
- *  belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to 
- *  different segments otherwise.
- *
- *  Refer to the most general form of \p exclusive_scan_by_key for additional details.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals); // in-place scan
- *
- *  // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
- *  \endcode
- *
- *  \see exclusive_scan
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive segmented prefix 
- *
- *  This version of \p exclusive_scan_by_key uses the value \c 0 to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key assumes \c plus as the associative
- *  operator used to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- * 
- *  This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
- *  predicate used to compare adjacent keys.  Specifically, consecutive iterators
- *  <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
- *  belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to 
- *  different segments otherwise.
- *
- *  Refer to the most general form of \p exclusive_scan_by_key for additional details.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key.
- *
- *  \code
- *  #include <thrust/scan.h>
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  thrust::exclusive_scan_by_key(key, key + 10, vals, vals); // in-place scan
- *
- *  // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
- *  \endcode
- *
- *  \see exclusive_scan
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \return The end of the output sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key using the \p
- *  thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \return The end of the output sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
- *  to compare adjacent keys.  Specifically, consecutive iterators <tt>i</tt> and
- *  <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
- *  <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \param binary_pred The binary predicate used to determine equality of keys.
- *  \return The end of the output sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::equal_to<int> binary_pred;
- *
- *  thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
- *  to compare adjacent keys.  Specifically, consecutive iterators <tt>i</tt> and
- *  <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
- *  <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \param binary_pred The binary predicate used to determine equality of keys.
- *  \return The end of the output sequence.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::equal_to<int> binary_pred;
- *
- *  thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
- *  to compare adjacent keys.  Specifically, consecutive iterators <tt>i</tt> and
- *  <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if 
- *  <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
- *
- *  This version of \p exclusive_scan_by_key uses the associative operator 
- *  \c binary_op to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \param binary_pred The binary predicate used to determine equality of keys.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                         and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int>     binary_op;
- *
- *  thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-
-/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix 
- *  sum operation. The term 'exclusive' means that each result does not include
- *  the corresponding input operand in the partial sum. The term 'segmented'
- *  means that the partial sums are broken into distinct segments.  In other
- *  words, within each segment a separate exclusive scan operation is computed.
- *  Refer to the code sample below for example usage.
- *
- *  This version of \p exclusive_scan_by_key uses the value \c init to
- *  initialize the exclusive scan operation.
- *
- *  This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
- *  to compare adjacent keys.  Specifically, consecutive iterators <tt>i</tt> and
- *  <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if 
- *  <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
- *
- *  This version of \p exclusive_scan_by_key uses the associative operator 
- *  \c binary_op to perform the prefix sum. When the input and output sequences
- *  are the same, the scan is performed in-place.
- *
- *  \param first1 The beginning of the key sequence.
- *  \param last1 The end of the key sequence.
- *  \param first2 The beginning of the input value sequence.
- *  \param result The beginning of the output value sequence.
- *  \param init The initial of the exclusive sum value.
- *  \param binary_pred The binary predicate used to determine equality of keys.
- *  \param binary_op The associatve operator used to 'sum' values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>,
- *                         and if \c x and \c y are objects of \c OutputIterator's \c value_type, then 
- *                         <tt>binary_op(x,y)</tt> is defined.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                         and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *  \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and range <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p exclusive_scan_by_key
- *
- *  \code
- *  #include <thrust/scan.h>
- *  #include <thrust/functional.h>
- *  
- *  int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
- *  int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
- *
- *  int init = 5;
- *
- *  thrust::equal_to<int> binary_pred;
- *  thrust::plus<int>     binary_op;
- *
- *  thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan
- *
- *  // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
- *  \endcode
- *
- *  \see exclusive_scan
- *  \see inclusive_scan_by_key
- *
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-
-/*! \} // end segmentedprefixsums
- */
-
-
-/*! \} // end prefix sums
- */
-
-	
-} // end namespace thrust
-
-#include <thrust/detail/scan.inl>
-
diff --git a/compat/thrust/scatter.h b/compat/thrust/scatter.h
deleted file mode 100644
index 59604ca170..0000000000
--- a/compat/thrust/scatter.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scatter.h
- *  \brief Irregular copying to a destination range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup scattering
- *  \ingroup copying
- *  \{
- */
-
-
-/*! \p scatter copies elements from a source range into an output array
- *  according to a map. For each iterator \c i in the range [\p first, \p last),
- *  the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>. The 
- *  output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>,
- *  the result is undefined.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map  Beginning of the sequence of output indices.
- *  \param result Destination of the source elements.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`.
- *
- *  The following code snippet demonstrates how to use \p scatter to
- *  reorder a range using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/scatter.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  // mark even indices with a 1; odd indices with a 0
- *  int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // scatter all even indices into the first half of the
- *  // range, and odd indices vice versa
- *  int map[10]   = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10);
- *  thrust::scatter(thrust::device,
- *                  d_values.begin(), d_values.end(),
- *                  d_map.begin(), d_output.begin());
- *  // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  \endcode
- *
- *  \note \p scatter is the inverse of thrust::gather.
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-               InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator result);
-
-
-/*! \p scatter copies elements from a source range into an output array
- *  according to a map. For each iterator \c i in the range [\p first, \p last),
- *  the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>. The 
- *  output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>,
- *  the result is undefined.
- *
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map  Beginning of the sequence of output indices.
- *  \param result Destination of the source elements.
- *
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`.
- *
- *  The following code snippet demonstrates how to use \p scatter to
- *  reorder a range.
- *
- *  \code
- *  #include <thrust/scatter.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  // mark even indices with a 1; odd indices with a 0
- *  int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
- *  thrust::device_vector<int> d_values(values, values + 10);
- *
- *  // scatter all even indices into the first half of the
- *  // range, and odd indices vice versa
- *  int map[10]   = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9};
- *  thrust::device_vector<int> d_map(map, map + 10);
- *
- *  thrust::device_vector<int> d_output(10);
- *  thrust::scatter(d_values.begin(), d_values.end(),
- *                  d_map.begin(), d_output.begin());
- *  // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
- *  \endcode
- *
- *  \note \p scatter is the inverse of thrust::gather.
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator result);
-
-
-/*! \p scatter_if conditionally copies elements from a source range into an 
- *  output array according to a map. For each iterator \c i in the 
- *  range <tt>[first, last)</tt> such that <tt>*(stencil + (i - first))</tt> is
- *  true, the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>.
- *  The output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>
- *  the result is undefined.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map Beginning of the sequence of output indices.
- *  \param stencil Beginning of the sequence of predicate values.
- *  \param output Beginning of the destination range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator3 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator3's \c value_type must be convertible to \c bool.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`.
- *
- *  \code
- *  #include <thrust/scatter.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int V[8] = {10, 20, 30, 40, 50, 60, 70, 80};
- *  int M[8] = {0, 5, 1, 6, 2, 7, 3, 4};
- *  int S[8] = {1, 0, 1, 0, 1, 0, 1, 0};
- *  int D[8] = {0, 0, 0, 0, 0, 0, 0, 0};
- * 
- *  thrust::scatter_if(thrust::host, V, V + 8, M, S, D);
- * 
- *  // D contains [10, 30, 50, 70, 0, 0, 0, 0];
- *  \endcode
- *
- *  \note \p scatter_if is the inverse of thrust::gather_if.
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output);
-
-
-/*! \p scatter_if conditionally copies elements from a source range into an 
- *  output array according to a map. For each iterator \c i in the 
- *  range <tt>[first, last)</tt> such that <tt>*(stencil + (i - first))</tt> is
- *  true, the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>.
- *  The output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>
- *  the result is undefined.
- *
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map Beginning of the sequence of output indices.
- *  \param stencil Beginning of the sequence of predicate values.
- *  \param output Beginning of the destination range.
- *
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator3 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator3's \c value_type must be convertible to \c bool.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`.
- *
- *  \code
- *  #include <thrust/scatter.h>
- *  ...
- *  int V[8] = {10, 20, 30, 40, 50, 60, 70, 80};
- *  int M[8] = {0, 5, 1, 6, 2, 7, 3, 4};
- *  int S[8] = {1, 0, 1, 0, 1, 0, 1, 0};
- *  int D[8] = {0, 0, 0, 0, 0, 0, 0, 0};
- * 
- *  thrust::scatter_if(V, V + 8, M, S, D);
- * 
- *  // D contains [10, 30, 50, 70, 0, 0, 0, 0];
- *  \endcode
- *
- *  \note \p scatter_if is the inverse of thrust::gather_if.
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output);
-
-
-/*! \p scatter_if conditionally copies elements from a source range into an 
- *  output array according to a map. For each iterator \c i in the 
- *  range <tt>[first, last)</tt> such that <tt>pred(*(stencil + (i - first)))</tt> is
- *  \c true, the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>.
- *  The output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>
- *  the result is undefined.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map Beginning of the sequence of output indices.
- *  \param stencil Beginning of the sequence of predicate values.
- *  \param output Beginning of the destination range.
- *  \param pred Predicate to apply to the stencil values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator3 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`.
- *
- *  \code
- *  #include <thrust/scatter.h>
- *  #include <thrust/execution_policy.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *
- *  ...
- *
- *  int V[8] = {10, 20, 30, 40, 50, 60, 70, 80};
- *  int M[8] = {0, 5, 1, 6, 2, 7, 3, 4};
- *  int S[8] = {2, 1, 2, 1, 2, 1, 2, 1};
- *  int D[8] = {0, 0, 0, 0, 0, 0, 0, 0};
- * 
- *  is_even pred;
- *  thrust::scatter_if(thrust::host, V, V + 8, M, S, D, pred);
- * 
- *  // D contains [10, 30, 50, 70, 0, 0, 0, 0];
- *  \endcode
- *  
- *  \note \p scatter_if is the inverse of thrust::gather_if.
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred);
-                  
-
-/*! \p scatter_if conditionally copies elements from a source range into an 
- *  output array according to a map. For each iterator \c i in the 
- *  range <tt>[first, last)</tt> such that <tt>pred(*(stencil + (i - first)))</tt> is
- *  \c true, the value \c *i is assigned to <tt>output[*(map + (i - first))]</tt>.
- *  The output iterator must permit random access. If the same index 
- *  appears more than once in the range <tt>[map, map + (last - first))</tt>
- *  the result is undefined.
- *
- *  \param first Beginning of the sequence of values to scatter.
- *  \param last End of the sequence of values to scatter.
- *  \param map Beginning of the sequence of output indices.
- *  \param stencil Beginning of the sequence of predicate values.
- *  \param output Beginning of the destination range.
- *  \param pred Predicate to apply to the stencil values.
- *
- *  \tparam InputIterator1 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type.
- *  \tparam InputIterator2 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type.
- *  \tparam InputIterator3 must be a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a> and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type.
- *  \tparam RandomAccessIterator must be a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access iterator</a>.
- *  \tparam Predicate must be a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`.
- *
- *  \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`.
- *
- *  \code
- *  #include <thrust/scatter.h>
- *
- *  struct is_even
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return (x % 2) == 0;
- *    }
- *  };
- *
- *  ...
- *
- *  int V[8] = {10, 20, 30, 40, 50, 60, 70, 80};
- *  int M[8] = {0, 5, 1, 6, 2, 7, 3, 4};
- *  int S[8] = {2, 1, 2, 1, 2, 1, 2, 1};
- *  int D[8] = {0, 0, 0, 0, 0, 0, 0, 0};
- * 
- *  is_even pred;
- *  thrust::scatter_if(V, V + 8, M, S, D, pred);
- * 
- *  // D contains [10, 30, 50, 70, 0, 0, 0, 0];
- *  \endcode
- *  
- *  \note \p scatter_if is the inverse of thrust::gather_if.
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred);
-
-
-/*! \} // end scattering
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/scatter.inl>
-
diff --git a/compat/thrust/sequence.h b/compat/thrust/sequence.h
deleted file mode 100644
index 6c54a5bbf4..0000000000
--- a/compat/thrust/sequence.h
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file sequence.h
- *  \brief Fills a range with a sequence of numbers
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations
- *  \{
- */
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  (i - first)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(thrust::host, A, A + 10);
- *  // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last);
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  (i - first)</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers.
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(A, A + 10);
- *  // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename ForwardIterator>
-  void sequence(ForwardIterator first,
-                ForwardIterator last);
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  init + (i - first)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param init The first value of the sequence of numbers.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers starting from the value 1 using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(thrust::host, A, A + 10, 1);
- *  // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init);
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  init + (i - first)</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param init The first value of the sequence of numbers.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers starting from the value 1.
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(A, A + 10, 1);
- *  // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename ForwardIterator, typename T>
-  void sequence(ForwardIterator first,
-                ForwardIterator last,
-                T init);
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  init + step * (i - first)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param init The first value of the sequence of numbers
- *  \param step The difference between consecutive elements.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers starting from the value 1 with a step size of 3 using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(thrust::host, A, A + 10, 1, 3);
- *  // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step);
-
-
-/*! \p sequence fills the range <tt>[first, last)</tt> with a sequence of numbers.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, this version of 
- *  \p sequence performs the assignment <tt>*i =  init + step * (i - first)</tt>.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param init The first value of the sequence of numbers
- *  \param step The difference between consecutive elements.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam T is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and \p T is convertible to \p ForwardIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p sequence to fill a range
- *  with a sequence of numbers starting from the value 1 with a step size of 3.
- *
- *  \code
- *  #include <thrust/sequence.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::sequence(A, A + 10, 1, 3);
- *  // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28}
- *  \endcode
- *
- *  \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no
- *        guarantee on order of execution.
- *
- *  \see http://www.sgi.com/tech/stl/iota.html
- */
-template<typename ForwardIterator, typename T>
-  void sequence(ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step);
-
-
-/*! \} // end transformations
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/sequence.inl>
-
diff --git a/compat/thrust/set_operations.h b/compat/thrust/set_operations.h
deleted file mode 100644
index a7ee624f92..0000000000
--- a/compat/thrust/set_operations.h
+++ /dev/null
@@ -1,2947 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file set_operations.h
- *  \brief Set theoretic operations for sorted ranges
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup set_operations Set Operations
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p set_difference constructs a sorted range that is the set difference of the sorted
- *  ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_difference performs the "difference" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt> and not contained in <tt>[first2, last1)</tt>. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[first1, last1)</tt> range shall be copied to the output range.
- *
- *  This version of \p set_difference compares elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference to compute the
- *  set difference of two sets of integers sorted in ascending order using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {0, 1, 3, 4, 5, 6, 9};
- *  int A2[5] = {1, 3, 5, 7, 9};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result);
- *  // result is now {0, 4, 6}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_difference.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1                                              first1,
-                                InputIterator1                                              last1,
-                                InputIterator2                                              first2,
-                                InputIterator2                                              last2,
-                                OutputIterator                                              result);
-
-
-/*! \p set_difference constructs a sorted range that is the set difference of the sorted
- *  ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_difference performs the "difference" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt> and not contained in <tt>[first2, last1)</tt>. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[first1, last1)</tt> range shall be copied to the output range.
- *
- *  This version of \p set_difference compares elements using \c operator<.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference to compute the
- *  set difference of two sets of integers sorted in ascending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {0, 1, 3, 4, 5, 6, 9};
- *  int A2[5] = {1, 3, 5, 7, 9};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result);
- *  // result is now {0, 4, 6}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_difference.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result);
-
-
-/*! \p set_difference constructs a sorted range that is the set difference of the sorted
- *  ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_difference performs the "difference" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt> and not contained in <tt>[first2, last1)</tt>. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[first1, last1)</tt> range shall be copied to the output range.
- *
- *  This version of \p set_difference compares elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference to compute the
- *  set difference of two sets of integers sorted in descending order using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {9, 6, 5, 4, 3, 1, 0};
- *  int A2[5] = {9, 7, 5, 3, 1};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater<int>());
- *  // result is now {6, 4, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_difference.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                InputIterator1                                              first1,
-                                InputIterator1                                              last1,
-                                InputIterator2                                              first2,
-                                InputIterator2                                              last2,
-                                OutputIterator                                              result,
-                                StrictWeakCompare                                           comp);
-
-
-/*! \p set_difference constructs a sorted range that is the set difference of the sorted
- *  ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_difference performs the "difference" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt> and not contained in <tt>[first2, last1)</tt>. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[first1, last1)</tt> range shall be copied to the output range.
- *
- *  This version of \p set_difference compares elements using a function object \p comp.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference to compute the
- *  set difference of two sets of integers sorted in descending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A1[6] = {9, 6, 5, 4, 3, 1, 0};
- *  int A2[5] = {9, 7, 5, 3, 1};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result, thrust::greater<int>());
- *  // result is now {6, 4, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_difference.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_difference(InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result,
-                                StrictWeakCompare comp);
-
-
-/*! \p set_intersection constructs a sorted range that is the
- *  intersection of sorted ranges <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt>. The return value is the end of the
- *  output range.
- *
- *  In the simplest case, \p set_intersection performs the
- *  "intersection" operation from set theory: the output range
- *  contains a copy of every element that is contained in both
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The
- *  general case is more complicated, because the input ranges may
- *  contain duplicate elements. The generalization is that if a value
- *  appears \c m times in <tt>[first1, last1)</tt> and \c n times in
- *  <tt>[first2, last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the output range.
- *  \p set_intersection is stable, meaning that both elements are
- *  copied from the first range rather than the second, and that the
- *  relative order of elements in the output range is the same as in
- *  the first input range.
- *
- *  This version of \p set_intersection compares objects using
- *  \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection to compute the
- *  set intersection of two sets of integers sorted in ascending order using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {1, 3, 5, 7, 9, 11};
- *  int A2[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int result[7];
- *
- *  int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result);
- *  // result is now {1, 3, 5}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_intersection.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  InputIterator1                                              first1,
-                                  InputIterator1                                              last1,
-                                  InputIterator2                                              first2,
-                                  InputIterator2                                              last2,
-                                  OutputIterator                                              result);
-
-
-/*! \p set_intersection constructs a sorted range that is the
- *  intersection of sorted ranges <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt>. The return value is the end of the
- *  output range.
- *
- *  In the simplest case, \p set_intersection performs the
- *  "intersection" operation from set theory: the output range
- *  contains a copy of every element that is contained in both
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The
- *  general case is more complicated, because the input ranges may
- *  contain duplicate elements. The generalization is that if a value
- *  appears \c m times in <tt>[first1, last1)</tt> and \c n times in
- *  <tt>[first2, last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the output range.
- *  \p set_intersection is stable, meaning that both elements are
- *  copied from the first range rather than the second, and that the
- *  relative order of elements in the output range is the same as in
- *  the first input range.
- *
- *  This version of \p set_intersection compares objects using
- *  \c operator<.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection to compute the
- *  set intersection of two sets of integers sorted in ascending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {1, 3, 5, 7, 9, 11};
- *  int A2[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int result[7];
- *
- *  int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result);
- *  // result is now {1, 3, 5}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_intersection.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result);
-
-
-/*! \p set_intersection constructs a sorted range that is the
- *  intersection of sorted ranges <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt>. The return value is the end of the
- *  output range.
- *
- *  In the simplest case, \p set_intersection performs the
- *  "intersection" operation from set theory: the output range
- *  contains a copy of every element that is contained in both
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The
- *  general case is more complicated, because the input ranges may
- *  contain duplicate elements. The generalization is that if a value
- *  appears \c m times in <tt>[first1, last1)</tt> and \c n times in
- *  <tt>[first2, last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the output range.
- *  \p set_intersection is stable, meaning that both elements are
- *  copied from the first range rather than the second, and that the
- *  relative order of elements in the output range is the same as in
- *  the first input range.
- *
- *  This version of \p set_intersection compares elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  The following code snippet demonstrates how to use \p set_intersection to compute
- *  the set intersection of sets of integers sorted in descending order using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {11, 9, 7, 5, 3, 1};
- *  int A2[7] = {13, 8, 5, 3, 2,  1, 1};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result, thrust::greater<int>());
- *  // result is now {5, 3, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_intersection.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_intersection(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  InputIterator1                                              first1,
-                                  InputIterator1                                              last1,
-                                  InputIterator2                                              first2,
-                                  InputIterator2                                              last2,
-                                  OutputIterator                                              result,
-                                  StrictWeakCompare                                           comp);
-
-
-/*! \p set_intersection constructs a sorted range that is the
- *  intersection of sorted ranges <tt>[first1, last1)</tt> and
- *  <tt>[first2, last2)</tt>. The return value is the end of the
- *  output range.
- *
- *  In the simplest case, \p set_intersection performs the
- *  "intersection" operation from set theory: the output range
- *  contains a copy of every element that is contained in both
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The
- *  general case is more complicated, because the input ranges may
- *  contain duplicate elements. The generalization is that if a value
- *  appears \c m times in <tt>[first1, last1)</tt> and \c n times in
- *  <tt>[first2, last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the output range.
- *  \p set_intersection is stable, meaning that both elements are
- *  copied from the first range rather than the second, and that the
- *  relative order of elements in the output range is the same as in
- *  the first input range.
- *
- *  This version of \p set_intersection compares elements using a function object \p comp.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  The following code snippet demonstrates how to use \p set_intersection to compute
- *  the set intersection of sets of integers sorted in descending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {11, 9, 7, 5, 3, 1};
- *  int A2[7] = {13, 8, 5, 3, 2,  1, 1};
- *
- *  int result[3];
- *
- *  int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result, thrust::greater<int>());
- *  // result is now {5, 3, 1}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_intersection.html
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_intersection(InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result,
-                                  StrictWeakCompare comp);
-
-
-/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric
- *  difference of the sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *  The return value is the end of the output range.
- *
- *  In the simplest case, \p set_symmetric_difference performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[first1, last1)</tt> but not <tt>[first2, last1)</tt>, and a copy of
- *  every element that is contained in <tt>[first2, last2)</tt> but not <tt>[first1, last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[first2, last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[first1, last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[first2, last2)</tt> if <tt>m < n</tt>.
- *
- *  This version of \p set_union compares elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference to compute
- *  the symmetric difference of two sets of integers sorted in ascending order using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {0, 1, 2, 2, 4, 6, 7};
- *  int A2[5] = {1, 1, 2, 5, 8};
- *
- *  int result[6];
- *
- *  int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {0, 4, 5, 6, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_symmetric_difference.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_difference
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator1                                              first1,
-                                          InputIterator1                                              last1,
-                                          InputIterator2                                              first2,
-                                          InputIterator2                                              last2,
-                                          OutputIterator                                              result);
-
-
-/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric
- *  difference of the sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *  The return value is the end of the output range.
- *
- *  In the simplest case, \p set_symmetric_difference performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[first1, last1)</tt> but not <tt>[first2, last1)</tt>, and a copy of
- *  every element that is contained in <tt>[first2, last2)</tt> but not <tt>[first1, last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[first2, last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[first1, last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[first2, last2)</tt> if <tt>m < n</tt>.
- *
- *  This version of \p set_union compares elements using \c operator<.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference to compute
- *  the symmetric difference of two sets of integers sorted in ascending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {0, 1, 2, 2, 4, 6, 7};
- *  int A2[5] = {1, 1, 2, 5, 8};
- *
- *  int result[6];
- *
- *  int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {0, 4, 5, 6, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_symmetric_difference.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_difference
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result);
-
-
-/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric
- *  difference of the sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *  The return value is the end of the output range.
- *
- *  In the simplest case, \p set_symmetric_difference performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[first1, last1)</tt> but not <tt>[first2, last1)</tt>, and a copy of
- *  every element that is contained in <tt>[first2, last2)</tt> but not <tt>[first1, last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[first2, last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[first1, last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[first2, last2)</tt> if <tt>m < n</tt>.
- *
- *  This version of \p set_union compares elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference to compute
- *  the symmetric difference of two sets of integers sorted in descending order using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {7, 6, 4, 2, 2, 1, 0};
- *  int A2[5] = {8, 5, 2, 1, 1};
- *
- *  int result[6];
- *
- *  int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {8, 7, 6, 5, 4, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_symmetric_difference.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_difference
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator1                                              first1,
-                                          InputIterator1                                              last1,
-                                          InputIterator2                                              first2,
-                                          InputIterator2                                              last2,
-                                          OutputIterator                                              result,
-                                          StrictWeakCompare                                           comp);
-
-
-/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric
- *  difference of the sorted ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>.
- *  The return value is the end of the output range.
- *
- *  In the simplest case, \p set_symmetric_difference performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[first1, last1)</tt> but not <tt>[first2, last1)</tt>, and a copy of
- *  every element that is contained in <tt>[first2, last2)</tt> but not <tt>[first1, last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[first2, last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[first1, last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[first2, last2)</tt> if <tt>m < n</tt>.
- *
- *  This version of \p set_union compares elements using a function object \p comp.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference to compute
- *  the symmetric difference of two sets of integers sorted in descending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {7, 6, 4, 2, 2, 1, 0};
- *  int A2[5] = {8, 5, 2, 1, 1};
- *
- *  int result[6];
- *
- *  int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {8, 7, 6, 5, 4, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_symmetric_difference.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_difference
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_symmetric_difference(InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result,
-                                          StrictWeakCompare comp);
-
-
-/*! \p set_union constructs a sorted range that is the union of the sorted ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_union performs the "union" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt>, <tt>[first2, last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  This version of \p set_union compares elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_union to compute the union of
- *  two sets of integers sorted in ascending order using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {0, 2, 4, 6, 8, 10, 12};
- *  int A2[5] = {1, 3, 5, 7, 9};
- *
- *  int result[11];
- *
- *  int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_union.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              first1,
-                           InputIterator1                                              last1,
-                           InputIterator2                                              first2,
-                           InputIterator2                                              last2,
-                           OutputIterator                                              result);
-
-
-/*! \p set_union constructs a sorted range that is the union of the sorted ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_union performs the "union" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt>, <tt>[first2, last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  This version of \p set_union compares elements using \c operator<.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_union to compute the union of
- *  two sets of integers sorted in ascending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A1[6] = {0, 2, 4, 6, 8, 10, 12};
- *  int A2[5] = {1, 3, 5, 7, 9};
- *
- *  int result[11];
- *
- *  int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result);
- *  // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_union.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result);
-
-
-/*! \p set_union constructs a sorted range that is the union of the sorted ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_union performs the "union" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt>, <tt>[first2, last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  This version of \p set_union compares elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_union to compute the union of
- *  two sets of integers sorted in ascending order using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A1[6] = {12, 10, 8, 6, 4, 2, 0};
- *  int A2[5] = {9, 7, 5, 3, 1};
- *
- *  int result[11];
- *
- *  int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater<int>());
- *  // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_union.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_union(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1                                              first1,
-                           InputIterator1                                              last1,
-                           InputIterator2                                              first2,
-                           InputIterator2                                              last2,
-                           OutputIterator                                              result,
-                           StrictWeakCompare                                           comp);
-
-
-/*! \p set_union constructs a sorted range that is the union of the sorted ranges
- *  <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt>. The return value is the
- *  end of the output range.
- *
- *  In the simplest case, \p set_union performs the "union" operation from set
- *  theory: the output range contains a copy of every element that is contained in
- *  <tt>[first1, last1)</tt>, <tt>[first2, last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[first1, last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[first2, last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  This version of \p set_union compares elements using a function object \p comp.
- *
- *  \param first1 The beginning of the first input range.
- *  \param last1 The end of the first input range.
- *  \param first2 The beginning of the second input range.
- *  \param last2 The end of the second input range.
- *  \param result The beginning of the output range.
- *  \param comp Comparison operator.
- *  \return The end of the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type.
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type.
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[first1, last1)</tt> and <tt>[first2, last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting range shall not overlap with either input range.
- *
- *  The following code snippet demonstrates how to use \p set_union to compute the union of
- *  two sets of integers sorted in ascending order.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A1[6] = {12, 10, 8, 6, 4, 2, 0};
- *  int A2[5] = {9, 7, 5, 3, 1};
- *
- *  int result[11];
- *
- *  int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result, thrust::greater<int>());
- *  // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/set_union.html
- *  \see \p merge
- *  \see \p includes
- *  \see \p set_union
- *  \see \p set_intersection
- *  \see \p set_symmetric_difference
- *  \see \p sort
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakCompare>
-  OutputIterator set_union(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result,
-                           StrictWeakCompare comp);
-
-
-/*! \p set_difference_by_key performs a key-value difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_difference_by_key performs the "difference" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt> and not contained in <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[keys_first1, keys_last1)</tt> range shall be copied to the output range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_difference_by_key compares key elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference_by_key to compute the
- *  set difference of two sets of integers sorted in ascending order with their values using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {0, 1, 3, 4, 5, 6, 9};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {1, 3, 5, 7, 9};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[3];
- *  int vals_result[3];
- *
- *  thrust::pair<int*,int*> end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 4, 6}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1                                              keys_first1,
-                          InputIterator1                                              keys_last1,
-                          InputIterator2                                              keys_first2,
-                          InputIterator2                                              keys_last2,
-                          InputIterator3                                              values_first1,
-                          InputIterator4                                              values_first2,
-                          OutputIterator1                                             keys_result,
-                          OutputIterator2                                             values_result);
-
-
-/*! \p set_difference_by_key performs a key-value difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_difference_by_key performs the "difference" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt> and not contained in <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[keys_first1, keys_last1)</tt> range shall be copied to the output range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_difference_by_key compares key elements using \c operator<.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference_by_key to compute the
- *  set difference of two sets of integers sorted in ascending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A_keys[6] = {0, 1, 3, 4, 5, 6, 9};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {1, 3, 5, 7, 9};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[3];
- *  int vals_result[3];
- *
- *  thrust::pair<int*,int*> end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 4, 6}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(InputIterator1                             keys_first1,
-                          InputIterator1                             keys_last1,
-                          InputIterator2                             keys_first2,
-                          InputIterator2                             keys_last2,
-                          InputIterator3                             values_first1,
-                          InputIterator4                             values_first2,
-                          OutputIterator1                            keys_result,
-                          OutputIterator2                            values_result);
-
-
-/*! \p set_difference_by_key performs a key-value difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_difference_by_key performs the "difference" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt> and not contained in <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[keys_first1, keys_last1)</tt> range shall be copied to the output range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_difference_by_key compares key elements using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference_by_key to compute the
- *  set difference of two sets of integers sorted in descending order with their values using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {9, 6, 5, 4, 3, 1, 0};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {9, 7, 5, 3, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[3];
- *  int vals_result[3];
- *
- *  thrust::pair<int*,int*> end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater<int>());
- *  // keys_result is now {0, 4, 6}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          InputIterator1                                              keys_first1,
-                          InputIterator1                                              keys_last1,
-                          InputIterator2                                              keys_first2,
-                          InputIterator2                                              keys_last2,
-                          InputIterator3                                              values_first1,
-                          InputIterator4                                              values_first2,
-                          OutputIterator1                                             keys_result,
-                          OutputIterator2                                             values_result,
-                          StrictWeakCompare                                           comp);
-
-
-/*! \p set_difference_by_key performs a key-value difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_difference_by_key performs the "difference" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt> and not contained in <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, the last <tt>max(m-n,0)</tt> elements from
- *  <tt>[keys_first1, keys_last1)</tt> range shall be copied to the output range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_difference_by_key compares key elements using a function object \p comp.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_difference_by_key to compute the
- *  set difference of two sets of integers sorted in descending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {9, 6, 5, 4, 3, 1, 0};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {9, 7, 5, 3, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[3];
- *  int vals_result[3];
- *
- *  thrust::pair<int*,int*> end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater<int>());
- *  // keys_result is now {0, 4, 6}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(InputIterator1                             keys_first1,
-                          InputIterator1                             keys_last1,
-                          InputIterator2                             keys_first2,
-                          InputIterator2                             keys_last2,
-                          InputIterator3                             values_first1,
-                          InputIterator4                             values_first2,
-                          OutputIterator1                            keys_result,
-                          OutputIterator2                            values_result,
-                          StrictWeakCompare                          comp);
-
-
-/*! \p set_intersection_by_key performs a key-value intersection operation from set theory.
- *  \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in both
- *  <tt>[keys_first1, keys_last1)</tt> <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if an element appears \c m times in <tt>[keys_first1, keys_last1)</tt>
- *  and \c n times in <tt>[keys_first2, keys_last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the keys output range.
- *  \p set_intersection_by_key is stable, meaning both that elements are copied from the first
- *  input range rather than the second, and that the relative order of elements in the output range
- *  is the same as the first input range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> to the keys output range,
- *  the corresponding value element is copied from <tt>[values_first1, values_last1)</tt> to the values
- *  output range.
- *
- *  This version of \p set_intersection_by_key compares objects using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no
- *        \c values_first2 parameter because elements from the second input range are never copied to the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection_by_key to compute the
- *  set intersection of two sets of integers sorted in ascending order with their values using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {1, 3, 5, 7, 9, 11};
- *  int A_vals[6] = {0, 0, 0, 0, 0,  0};
- *  
- *  int B_keys[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int keys_result[7];
- *  int vals_result[7];
- *
- *  thrust::pair<int*,int*> end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result);
- *
- *  // keys_result is now {1, 3, 5}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_difference_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            InputIterator1                                              keys_first1,
-                            InputIterator1                                              keys_last1,
-                            InputIterator2                                              keys_first2,
-                            InputIterator2                                              keys_last2,
-                            InputIterator3                                              values_first1,
-                            OutputIterator1                                             keys_result,
-                            OutputIterator2                                             values_result);
-
-
-/*! \p set_intersection_by_key performs a key-value intersection operation from set theory.
- *  \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in both
- *  <tt>[keys_first1, keys_last1)</tt> <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if an element appears \c m times in <tt>[keys_first1, keys_last1)</tt>
- *  and \c n times in <tt>[keys_first2, keys_last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the keys output range.
- *  \p set_intersection_by_key is stable, meaning both that elements are copied from the first
- *  input range rather than the second, and that the relative order of elements in the output range
- *  is the same as the first input range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> to the keys output range,
- *  the corresponding value element is copied from <tt>[values_first1, values_last1)</tt> to the values
- *  output range.
- *
- *  This version of \p set_intersection_by_key compares objects using \c operator<.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no
- *        \c values_first2 parameter because elements from the second input range are never copied to the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection_by_key to compute the
- *  set intersection of two sets of integers sorted in ascending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A_keys[6] = {1, 3, 5, 7, 9, 11};
- *  int A_vals[6] = {0, 0, 0, 0, 0,  0};
- *  
- *  int B_keys[7] = {1, 1, 2, 3, 5,  8, 13};
- *
- *  int keys_result[7];
- *  int vals_result[7];
- *
- *  thrust::pair<int*,int*> end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result);
- *
- *  // keys_result is now {1, 3, 5}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_difference_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(InputIterator1                             keys_first1,
-                            InputIterator1                             keys_last1,
-                            InputIterator2                             keys_first2,
-                            InputIterator2                             keys_last2,
-                            InputIterator3                             values_first1,
-                            OutputIterator1                            keys_result,
-                            OutputIterator2                            values_result);
-
-
-/*! \p set_intersection_by_key performs a key-value intersection operation from set theory.
- *  \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in both
- *  <tt>[keys_first1, keys_last1)</tt> <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if an element appears \c m times in <tt>[keys_first1, keys_last1)</tt>
- *  and \c n times in <tt>[keys_first2, keys_last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the keys output range.
- *  \p set_intersection_by_key is stable, meaning both that elements are copied from the first
- *  input range rather than the second, and that the relative order of elements in the output range
- *  is the same as the first input range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> to the keys output range,
- *  the corresponding value element is copied from <tt>[values_first1, values_last1)</tt> to the values
- *  output range.
- *
- *  This version of \p set_intersection_by_key compares objects using a function object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no
- *        \c values_first2 parameter because elements from the second input range are never copied to the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection_by_key to compute the
- *  set intersection of two sets of integers sorted in descending order with their values using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {11, 9, 7, 5, 3, 1};
- *  int A_vals[6] = { 0, 0, 0, 0, 0, 0};
- *  
- *  int B_keys[7] = {13, 8, 5, 3, 2, 1, 1};
- *
- *  int keys_result[7];
- *  int vals_result[7];
- *
- *  thrust::pair<int*,int*> end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater<int>());
- *
- *  // keys_result is now {5, 3, 1}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_difference_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                            InputIterator1                                              keys_first1,
-                            InputIterator1                                              keys_last1,
-                            InputIterator2                                              keys_first2,
-                            InputIterator2                                              keys_last2,
-                            InputIterator3                                              values_first1,
-                            OutputIterator1                                             keys_result,
-                            OutputIterator2                                             values_result,
-                            StrictWeakCompare                                           comp);
-
-
-/*! \p set_intersection_by_key performs a key-value intersection operation from set theory.
- *  \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set
- *  theory: the keys output range contains a copy of every element that is contained in both
- *  <tt>[keys_first1, keys_last1)</tt> <tt>[keys_first2, keys_last2)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if an element appears \c m times in <tt>[keys_first1, keys_last1)</tt>
- *  and \c n times in <tt>[keys_first2, keys_last2)</tt> (where \c m may be zero), then it
- *  appears <tt>min(m,n)</tt> times in the keys output range.
- *  \p set_intersection_by_key is stable, meaning both that elements are copied from the first
- *  input range rather than the second, and that the relative order of elements in the output range
- *  is the same as the first input range.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> to the keys output range,
- *  the corresponding value element is copied from <tt>[values_first1, values_last1)</tt> to the values
- *  output range.
- *
- *  This version of \p set_intersection_by_key compares objects using a function object \p comp.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no
- *        \c values_first2 parameter because elements from the second input range are never copied to the output range.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_intersection_by_key to compute the
- *  set intersection of two sets of integers sorted in descending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {11, 9, 7, 5, 3, 1};
- *  int A_vals[6] = { 0, 0, 0, 0, 0, 0};
- *  
- *  int B_keys[7] = {13, 8, 5, 3, 2, 1, 1};
- *
- *  int keys_result[7];
- *  int vals_result[7];
- *
- *  thrust::pair<int*,int*> end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater<int>());
- *
- *  // keys_result is now {5, 3, 1}
- *  // vals_result is now {0, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_difference_by_key
- *  \see \p set_symmetric_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(InputIterator1                             keys_first1,
-                            InputIterator1                             keys_last1,
-                            InputIterator2                             keys_first2,
-                            InputIterator2                             keys_last2,
-                            InputIterator3                             values_first1,
-                            OutputIterator1                            keys_result,
-                            OutputIterator2                            values_result,
-                            StrictWeakCompare                          comp);
-
-
-/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[keys_first1, keys_last1)</tt> but not <tt>[keys_first2, keys_last1)</tt>, and a copy of
- *  every element that is contained in <tt>[keys_first2, keys_last2)</tt> but not <tt>[keys_first1, keys_last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[keys_first2, keys_last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[keys_first1, keys_last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[keys_first2, keys_last2)</tt> if <tt>m < n</tt>.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_symmetric_difference_by_key compares key elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in ascending order with their values using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {0, 1, 2, 2, 4, 6, 7};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {1, 1, 2, 5, 8};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[6];
- *  int vals_result[6];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 4, 5, 6, 7, 8}
- *  // vals_result is now {0, 0, 1, 0, 0, 1}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                    InputIterator1                                              keys_first1,
-                                    InputIterator1                                              keys_last1,
-                                    InputIterator2                                              keys_first2,
-                                    InputIterator2                                              keys_last2,
-                                    InputIterator3                                              values_first1,
-                                    InputIterator4                                              values_first2,
-                                    OutputIterator1                                             keys_result,
-                                    OutputIterator2                                             values_result);
-
-
-/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[keys_first1, keys_last1)</tt> but not <tt>[keys_first2, keys_last1)</tt>, and a copy of
- *  every element that is contained in <tt>[keys_first2, keys_last2)</tt> but not <tt>[keys_first1, keys_last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[keys_first2, keys_last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[keys_first1, keys_last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[keys_first2, keys_last2)</tt> if <tt>m < n</tt>.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_symmetric_difference_by_key compares key elements using \c operator<.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in ascending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A_keys[6] = {0, 1, 2, 2, 4, 6, 7};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {1, 1, 2, 5, 8};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[6];
- *  int vals_result[6];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 4, 5, 6, 7, 8}
- *  // vals_result is now {0, 0, 1, 0, 0, 1}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(InputIterator1                             keys_first1,
-                                    InputIterator1                             keys_last1,
-                                    InputIterator2                             keys_first2,
-                                    InputIterator2                             keys_last2,
-                                    InputIterator3                             values_first1,
-                                    InputIterator4                             values_first2,
-                                    OutputIterator1                            keys_result,
-                                    OutputIterator2                            values_result);
-
-
-/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[keys_first1, keys_last1)</tt> but not <tt>[keys_first2, keys_last1)</tt>, and a copy of
- *  every element that is contained in <tt>[keys_first2, keys_last2)</tt> but not <tt>[keys_first1, keys_last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[keys_first2, keys_last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[keys_first1, keys_last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[keys_first2, keys_last2)</tt> if <tt>m < n</tt>.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in descending order with their values using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {7, 6, 4, 2, 2, 1, 0};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {8, 5, 2, 1, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[6];
- *  int vals_result[6];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {8, 7, 6, 5, 4, 0}
- *  // vals_result is now {1, 0, 0, 1, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                    InputIterator1                                              keys_first1,
-                                    InputIterator1                                              keys_last1,
-                                    InputIterator2                                              keys_first2,
-                                    InputIterator2                                              keys_last2,
-                                    InputIterator3                                              values_first1,
-                                    InputIterator4                                              values_first2,
-                                    OutputIterator1                                             keys_result,
-                                    OutputIterator2                                             values_result,
-                                    StrictWeakCompare                                           comp);
-
-
-/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory.
- *  \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation:
- *  it constructs the union of the two sets A - B and B - A, where A and B are the two
- *  input ranges. That is, the output range contains a copy of every element that is
- *  contained in <tt>[keys_first1, keys_last1)</tt> but not <tt>[keys_first2, keys_last1)</tt>, and a copy of
- *  every element that is contained in <tt>[keys_first2, keys_last2)</tt> but not <tt>[keys_first1, keys_last1)</tt>.
- *  The general case is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements that are
- *  equivalent to each other and <tt>[keys_first2, keys_last1)</tt> contains \c n elements that are
- *  equivalent to them, then <tt>|m - n|</tt> of those elements shall be copied to the output
- *  range: the last <tt>m - n</tt> elements from <tt>[keys_first1, keys_last1)</tt> if <tt>m > n</tt>, and
- *  the last <tt>n - m</tt> of these elements from <tt>[keys_first2, keys_last2)</tt> if <tt>m < n</tt>.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in descending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {7, 6, 4, 2, 2, 1, 0};
- *  int A_vals[6] = {0, 0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {8, 5, 2, 1, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[6];
- *  int vals_result[6];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {8, 7, 6, 5, 4, 0}
- *  // vals_result is now {1, 0, 0, 1, 0, 0}
- *  \endcode
- *
- *  \see \p set_union_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(InputIterator1                             keys_first1,
-                                    InputIterator1                             keys_last1,
-                                    InputIterator2                             keys_first2,
-                                    InputIterator2                             keys_last2,
-                                    InputIterator3                             values_first1,
-                                    InputIterator4                             values_first2,
-                                    OutputIterator1                            keys_result,
-                                    OutputIterator2                            values_result,
-                                    StrictWeakCompare                          comp);
-
-
-/*! \p set_union_by_key performs a key-value union operation from set theory.
- *  \p set_union_by_key constructs a sorted range that is the union of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_union_by_key performs the "union" operation from set theory:
- *  the output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt>, <tt>[keys_first2, keys_last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_union_by_key compares key elements using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in ascending order with their values using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {0, 2, 4, 6, 8, 10, 12};
- *  int A_vals[6] = {0, 0, 0, 0, 0,  0,  0};
- *
- *  int B_keys[5] = {1, 3, 5, 7, 9};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[11];
- *  int vals_result[11];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12}
- *  // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1,  0,  0}
- *  \endcode
- *
- *  \see \p set_symmetric_difference_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1                                              keys_first1,
-                     InputIterator1                                              keys_last1,
-                     InputIterator2                                              keys_first2,
-                     InputIterator2                                              keys_last2,
-                     InputIterator3                                              values_first1,
-                     InputIterator4                                              values_first2,
-                     OutputIterator1                                             keys_result,
-                     OutputIterator2                                             values_result);
-
-
-/*! \p set_union_by_key performs a key-value union operation from set theory.
- *  \p set_union_by_key constructs a sorted range that is the union of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_union_by_key performs the "union" operation from set theory:
- *  the output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt>, <tt>[keys_first2, keys_last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_union_by_key compares key elements using \c operator<.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to <tt>operator<</tt>.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in ascending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  ...
- *  int A_keys[6] = {0, 2, 4, 6, 8, 10, 12};
- *  int A_vals[6] = {0, 0, 0, 0, 0,  0,  0};
- *
- *  int B_keys[5] = {1, 3, 5, 7, 9};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[11];
- *  int vals_result[11];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result);
- *  // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12}
- *  // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1,  0,  0}
- *  \endcode
- *
- *  \see \p set_symmetric_difference_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(InputIterator1                             keys_first1,
-                     InputIterator1                             keys_last1,
-                     InputIterator2                             keys_first2,
-                     InputIterator2                             keys_last2,
-                     InputIterator3                             values_first1,
-                     InputIterator4                             values_first2,
-                     OutputIterator1                            keys_result,
-                     OutputIterator2                            values_result);
-
-
-/*! \p set_union_by_key performs a key-value union operation from set theory.
- *  \p set_union_by_key constructs a sorted range that is the union of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_union_by_key performs the "union" operation from set theory:
- *  the output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt>, <tt>[keys_first2, keys_last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_union_by_key compares key elements using a function object \c comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in descending order with their values using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  int A_keys[6] = {12, 10, 8, 6, 4, 2, 0};
- *  int A_vals[6] = { 0,  0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {9, 7, 5, 3, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[11];
- *  int vals_result[11];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater<int>());
- *  // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
- *  // vals_result is now { 0,  1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}
- *  \endcode
- *
- *  \see \p set_symmetric_difference_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                     InputIterator1                                              keys_first1,
-                     InputIterator1                                              keys_last1,
-                     InputIterator2                                              keys_first2,
-                     InputIterator2                                              keys_last2,
-                     InputIterator3                                              values_first1,
-                     InputIterator4                                              values_first2,
-                     OutputIterator1                                             keys_result,
-                     OutputIterator2                                             values_result,
-                     StrictWeakCompare                                           comp);
-
-
-/*! \p set_union_by_key performs a key-value union operation from set theory.
- *  \p set_union_by_key constructs a sorted range that is the union of the sorted
- *  ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt>. Associated
- *  with each element from the input and output key ranges is a value element. The associated input
- *  value ranges need not be sorted.
- *
- *  In the simplest case, \p set_union_by_key performs the "union" operation from set theory:
- *  the output range contains a copy of every element that is contained in
- *  <tt>[keys_first1, keys_last1)</tt>, <tt>[keys_first2, keys_last1)</tt>, or both. The general case
- *  is more complicated, because the input ranges may contain duplicate elements.
- *  The generalization is that if <tt>[keys_first1, keys_last1)</tt> contains \c m elements
- *  that are equivalent to each other and if <tt>[keys_first2, keys_last2)</tt> contains \c n
- *  elements that are equivalent to them, then all \c m elements from the first
- *  range shall be copied to the output range, in order, and then <tt>max(n - m, 0)</tt>
- *  elements from the second range shall be copied to the output, in order.
- *
- *  Each time a key element is copied from <tt>[keys_first1, keys_last1)</tt> or
- *  <tt>[keys_first2, keys_last2)</tt> is copied to the keys output range, the
- *  corresponding value element is copied from the corresponding values input range (beginning at
- *  \p values_first1 or \p values_first2) to the values output range.
- *
- *  This version of \p set_union_by_key compares key elements using a function object \c comp.
- *
- *  \param keys_first1 The beginning of the first input range of keys.
- *  \param keys_last1 The end of the first input range of keys.
- *  \param keys_first2 The beginning of the second input range of keys.
- *  \param keys_last2 The end of the second input range of keys.
- *  \param values_first1 The beginning of the first input range of values.
- *  \param values_first2 The beginning of the first input range of values.
- *  \param keys_result The beginning of the output range of keys.
- *  \param values_result The beginning of the output range of values.
- *  \param comp Comparison operator.
- *  \return A \p pair \c p such that <tt>p.first</tt> is the end of the output range of keys,
- *          and such that <tt>p.second</tt> is the end of the output range of values.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator1 and \p InputIterator2 have the same \c value_type,
- *          \p InputIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          \p InputIterator2 and \p InputIterator1 have the same \c value_type,
- *          \p InputIterator2's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a>,
- *          the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the <a href="http://www.sgi.com/tech/stl/LessThanComparable">LessThan Comparable</a> requirements,
- *          and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types.
- *  \tparam InputIterator3 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam InputIterator4 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types.
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam StrictWeakCompare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The ranges <tt>[keys_first1, keys_last1)</tt> and <tt>[keys_first2, keys_last2)</tt> shall be sorted with respect to \p comp.
- *  \pre The resulting ranges shall not overlap with any input range.
- *
- *  The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the
- *  symmetric difference of two sets of integers sorted in descending order with their values.
- *
- *  \code
- *  #include <thrust/set_operations.h>
- *  #include <thrust/functional.h>
- *  ...
- *  int A_keys[6] = {12, 10, 8, 6, 4, 2, 0};
- *  int A_vals[6] = { 0,  0, 0, 0, 0, 0, 0};
- *
- *  int B_keys[5] = {9, 7, 5, 3, 1};
- *  int B_vals[5] = {1, 1, 1, 1, 1};
- *
- *  int keys_result[11];
- *  int vals_result[11];
- *
- *  thrust::pair<int*,int*> end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater<int>());
- *  // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
- *  // vals_result is now { 0,  1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}
- *  \endcode
- *
- *  \see \p set_symmetric_difference_by_key
- *  \see \p set_intersection_by_key
- *  \see \p set_difference_by_key
- *  \see \p sort_by_key
- *  \see \p is_sorted
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakCompare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(InputIterator1                             keys_first1,
-                     InputIterator1                             keys_last1,
-                     InputIterator2                             keys_first2,
-                     InputIterator2                             keys_last2,
-                     InputIterator3                             values_first1,
-                     InputIterator4                             values_first2,
-                     OutputIterator1                            keys_result,
-                     OutputIterator2                            values_result,
-                     StrictWeakCompare                          comp);
-
-
-/*! \} // end set_operations
- */
-
-
-} // end thrust
-
-#include <thrust/detail/set_operations.inl>
-
diff --git a/compat/thrust/sort.h b/compat/thrust/sort.h
deleted file mode 100644
index e8edfcd876..0000000000
--- a/compat/thrust/sort.h
+++ /dev/null
@@ -1,1349 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file sort.h
- *  \brief Functions for reorganizing ranges into sorted order
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup sorting
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! \p sort sorts the elements in <tt>[first, last)</tt> into
- *  ascending order, meaning that if \c i and \c j are any two valid
- *  iterators in <tt>[first, last)</tt> such that \c i precedes \c j,
- *  then \c *j is not less than \c *i. Note: \c sort is not guaranteed
- *  to be stable. That is, suppose that \c *i and \c *j are equivalent:
- *  neither one is less than the other. It is not guaranteed that the
- *  relative order of these two elements will be preserved by \p sort.
- *
- *  This version of \p sort compares objects using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *  The following code snippet demonstrates how to use \p sort to sort
- *  a sequence of integers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(thrust::host, A, A + N);
- *  // A is now {1, 2, 4, 5, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort
- *  \see \p sort_by_key
- */
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  void sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last);
-
-
-/*! \p sort sorts the elements in <tt>[first, last)</tt> into
- *  ascending order, meaning that if \c i and \c j are any two valid
- *  iterators in <tt>[first, last)</tt> such that \c i precedes \c j,
- *  then \c *j is not less than \c *i. Note: \c sort is not guaranteed
- *  to be stable. That is, suppose that \c *i and \c *j are equivalent:
- *  neither one is less than the other. It is not guaranteed that the
- *  relative order of these two elements will be preserved by \p sort.
- *
- *  This version of \p sort compares objects using \c operator<.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *  The following code snippet demonstrates how to use \p sort to sort
- *  a sequence of integers.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(A, A + N);
- *  // A is now {1, 2, 4, 5, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort
- *  \see \p sort_by_key
- */
-template<typename RandomAccessIterator>
-  void sort(RandomAccessIterator first,
-            RandomAccessIterator last);
-
-
-/*! \p sort sorts the elements in <tt>[first, last)</tt> into
- *  ascending order, meaning that if \c i and \c j are any two valid
- *  iterators in <tt>[first, last)</tt> such that \c i precedes \c j,
- *  then \c *j is not less than \c *i. Note: \c sort is not guaranteed
- *  to be stable. That is, suppose that \c *i and \c *j are equivalent:
- *  neither one is less than the other. It is not guaranteed that the
- *  relative order of these two elements will be preserved by \p sort.
- *
- *  This version of \p sort compares objects using a function object
- *  \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param comp  Comparison operator.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code demonstrates how to sort integers in descending order
- *  using the greater<int> comparison operator using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(thrust::host, A, A + N, thrust::greater<int>());
- *  // A is now {8, 7, 5, 4, 2, 1};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort
- *  \see \p sort_by_key
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp);
-
-
-/*! \p sort sorts the elements in <tt>[first, last)</tt> into
- *  ascending order, meaning that if \c i and \c j are any two valid
- *  iterators in <tt>[first, last)</tt> such that \c i precedes \c j,
- *  then \c *j is not less than \c *i. Note: \c sort is not guaranteed
- *  to be stable. That is, suppose that \c *i and \c *j are equivalent:
- *  neither one is less than the other. It is not guaranteed that the
- *  relative order of these two elements will be preserved by \p sort.
- *
- *  This version of \p sort compares objects using a function object
- *  \p comp.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param comp  Comparison operator.
- *
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code demonstrates how to sort integers in descending order
- *  using the greater<int> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(A, A + N, thrust::greater<int>());
- *  // A is now {8, 7, 5, 4, 2, 1};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort
- *  \see \p sort_by_key
- */
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp);
-
-
-/*! \p stable_sort is much like \c sort: it sorts the elements in
- *  <tt>[first, last)</tt> into ascending order, meaning that if \c i
- *  and \c j are any two valid iterators in <tt>[first, last)</tt> such
- *  that \c i precedes \c j, then \c *j is not less than \c *i.
- *
- *  As the name suggests, \p stable_sort is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[first, last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort compares objects using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *  The following code snippet demonstrates how to use \p sort to sort
- *  a sequence of integers using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::stable_sort(thrust::host, A, A + N);
- *  // A is now {1, 2, 4, 5, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_sort.html
- *  \see \p sort
- *  \see \p stable_sort_by_key
- */
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  void stable_sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last);
-
-
-/*! \p stable_sort is much like \c sort: it sorts the elements in
- *  <tt>[first, last)</tt> into ascending order, meaning that if \c i
- *  and \c j are any two valid iterators in <tt>[first, last)</tt> such
- *  that \c i precedes \c j, then \c *j is not less than \c *i.
- *
- *  As the name suggests, \p stable_sort is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[first, last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort compares objects using \c operator<.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *  The following code snippet demonstrates how to use \p sort to sort
- *  a sequence of integers.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::stable_sort(A, A + N);
- *  // A is now {1, 2, 4, 5, 7, 8}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_sort.html
- *  \see \p sort
- *  \see \p stable_sort_by_key
- */
-template<typename RandomAccessIterator>
-  void stable_sort(RandomAccessIterator first,
-                   RandomAccessIterator last);
-
-
-/*! \p stable_sort is much like \c sort: it sorts the elements in
- *  <tt>[first, last)</tt> into ascending order, meaning that if \c i
- *  and \c j are any two valid iterators in <tt>[first, last)</tt> such
- *  that \c i precedes \c j, then \c *j is not less than \c *i.
- *
- *  As the name suggests, \p stable_sort is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[first, last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort compares objects using a function object
- *  \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code demonstrates how to sort integers in descending order
- *  using the greater<int> comparison operator using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(A, A + N, thrust::greater<int>());
- *  // A is now {8, 7, 5, 4, 2, 1};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_sort.html
- *  \see \p sort
- *  \see \p stable_sort_by_key
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp);
-
-
-/*! \p stable_sort is much like \c sort: it sorts the elements in
- *  <tt>[first, last)</tt> into ascending order, meaning that if \c i
- *  and \c j are any two valid iterators in <tt>[first, last)</tt> such
- *  that \c i precedes \c j, then \c *j is not less than \c *i.
- *
- *  As the name suggests, \p stable_sort is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[first, last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort compares objects using a function object
- *  \p comp.
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam RandomAccessIterator is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator is mutable,
- *          and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code demonstrates how to sort integers in descending order
- *  using the greater<int> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  ...
- *  const int N = 6;
- *  int A[N] = {1, 4, 2, 8, 5, 7};
- *  thrust::sort(A, A + N, thrust::greater<int>());
- *  // A is now {8, 7, 5, 4, 2, 1};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/stable_sort.html
- *  \see \p sort
- *  \see \p stable_sort_by_key
- */
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp);
-
-
-///////////////
-// Key Value //
-///////////////
-
-
-/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the
- *  elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that
- *  \c *i and \c *j are equivalent: neither one is less than the other. It is not
- *  guaranteed that the relative order of these two keys or the relative
- *  order of their corresponding values will be preserved by \p sort_by_key.
- *
- *  This version of \p sort_by_key compares key objects using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator1's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys using the \p thrust::host execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::sort_by_key(thrust::host, keys, keys + N, values);
- *  // keys is now   {  1,   2,   4,   5,   7,   8}
- *  // values is now {'a', 'c', 'b', 'e', 'f', 'd'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort_by_key
- *  \see \p sort
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first);
-
-
-/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the
- *  elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that
- *  \c *i and \c *j are equivalent: neither one is less than the other. It is not
- *  guaranteed that the relative order of these two keys or the relative
- *  order of their corresponding values will be preserved by \p sort_by_key.
- *
- *  This version of \p sort_by_key compares key objects using \c operator<.
- *
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator1's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::sort_by_key(keys, keys + N, values);
- *  // keys is now   {  1,   2,   4,   5,   7,   8}
- *  // values is now {'a', 'c', 'b', 'e', 'f', 'd'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort_by_key
- *  \see \p sort
- */
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first);
-
-
-/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the
- *  elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that
- *  \c *i and \c *j are equivalent: neither one is less than the other. It is not
- *  guaranteed that the relative order of these two keys or the relative
- *  order of their corresponding values will be preserved by \p sort_by_key.
- *
- *  This version of \p sort_by_key compares key objects using a function object
- *  \c comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys using the \p thrust::host execution policy
- *  for parallelization.The keys are sorted in descending order using the <tt>greater<int></tt> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::sort_by_key(thrust::host, keys, keys + N, values, thrust::greater<int>());
- *  // keys is now   {  8,   7,   5,   4,   2,   1}
- *  // values is now {'d', 'f', 'e', 'b', 'c', 'a'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort_by_key
- *  \see \p sort
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp);
-
-
-/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the
- *  elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that
- *  \c *i and \c *j are equivalent: neither one is less than the other. It is not
- *  guaranteed that the relative order of these two keys or the relative
- *  order of their corresponding values will be preserved by \p sort_by_key.
- *
- *  This version of \p sort_by_key compares key objects using a function object
- *  \c comp.
- *
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys.  The keys
- *  are sorted in descending order using the greater<int> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::sort_by_key(keys, keys + N, values, thrust::greater<int>());
- *  // keys is now   {  8,   7,   5,   4,   2,   1}
- *  // values is now {'d', 'f', 'e', 'b', 'c', 'a'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p stable_sort_by_key
- *  \see \p sort
- */
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp);
-
-
-/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key
- *  sorts the elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  As the name suggests, \p stable_sort_by_key is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[keys_first, keys_last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort_by_key is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort_by_key compares key objects using \c operator<.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator1's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p stable_sort_by_key to sort
- *  an array of characters using integers as sorting keys using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::stable_sort_by_key(thrust::host, keys, keys + N, values);
- *  // keys is now   {  1,   2,   4,   5,   7,   8}
- *  // values is now {'a', 'c', 'b', 'e', 'f', 'd'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p sort_by_key
- *  \see \p stable_sort
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first);
-
-
-/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key
- *  sorts the elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  As the name suggests, \p stable_sort_by_key is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[keys_first, keys_last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort_by_key is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort_by_key compares key objects using \c operator<.
- *
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering relation on \p RandomAccessIterator1's \c value_type is a <em>strict weak ordering</em>, as defined in the
- *          <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p stable_sort_by_key to sort
- *  an array of characters using integers as sorting keys.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::stable_sort_by_key(keys, keys + N, values);
- *  // keys is now   {  1,   2,   4,   5,   7,   8}
- *  // values is now {'a', 'c', 'b', 'e', 'f', 'd'}
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p sort_by_key
- *  \see \p stable_sort
- */
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first);
-
-
-/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key
- *  sorts the elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  As the name suggests, \p stable_sort_by_key is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[keys_first, keys_last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort_by_key is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort_by_key compares key objects using the function
- *  object \p comp.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys using the \p thrust::host execution policy for
- *  parallelization. The keys are sorted in descending order using the <tt>greater<int></tt> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::stable_sort_by_key(thrust::host, keys, keys + N, values, thrust::greater<int>());
- *  // keys is now   {  8,   7,   5,   4,   2,   1}
- *  // values is now {'d', 'f', 'e', 'b', 'c', 'a'}
- *  \endcode
- *
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p sort_by_key
- *  \see \p stable_sort
- */
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp);
-
-
-/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key
- *  sorts the elements in <tt>[keys_first, keys_last)</tt> and <tt>[values_first,
- *  values_first + (keys_last - keys_first))</tt> into ascending key order,
- *  meaning that if \c i and \c j are any two valid iterators in <tt>[keys_first,
- *  keys_last)</tt> such that \c i precedes \c j, and \c p and \c q are iterators
- *  in <tt>[values_first, values_first + (keys_last - keys_first))</tt>
- *  corresponding to \c i and \c j respectively, then \c *j is not less than
- *  \c *i.
- *
- *  As the name suggests, \p stable_sort_by_key is stable: it preserves the
- *  relative ordering of equivalent elements. That is, if \c x and \c y
- *  are elements in <tt>[keys_first, keys_last)</tt> such that \c x precedes \c y,
- *  and if the two elements are equivalent (neither <tt>x < y</tt> nor
- *  <tt>y < x</tt>) then a postcondition of \p stable_sort_by_key is that \c x
- *  still precedes \c y.
- *
- *  This version of \p stable_sort_by_key compares key objects using the function
- *  object \p comp.
- *
- *  \param keys_first The beginning of the key sequence.
- *  \param keys_last The end of the key sequence.
- *  \param values_first The beginning of the value sequence.
- *  \param comp Comparison operator.
- *
- *  \tparam RandomAccessIterator1 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.html">Random Access Iterator</a>,
- *          \p RandomAccessIterator1 is mutable,
- *          and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's
- *          \c first_argument_type and \c second_argument_type.
- *  \tparam RandomAccessIterator2 is a model of <a href="http://www.sgi.com/tech/stl/RandomAccessIterator.hml">Random Access Iterator</a>,
- *          and \p RandomAccessIterator2 is mutable.
- *  \tparam StrictWeakOrdering is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last))</tt> shall not overlap the range <tt>[values_first, values_first + (keys_last - keys_first))</tt>.
- *
- *  The following code snippet demonstrates how to use \p sort_by_key to sort
- *  an array of character values using integers as sorting keys.  The keys
- *  are sorted in descending order using the greater<int> comparison operator.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  ...
- *  const int N = 6;
- *  int    keys[N] = {  1,   4,   2,   8,   5,   7};
- *  char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'};
- *  thrust::stable_sort_by_key(keys, keys + N, values, thrust::greater<int>());
- *  // keys is now   {  8,   7,   5,   4,   2,   1}
- *  // values is now {'d', 'f', 'e', 'b', 'c', 'a'}
- *  \endcode
- *
- *
- *  \see http://www.sgi.com/tech/stl/sort.html
- *  \see \p sort_by_key
- *  \see \p stable_sort
- */
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp);
-
-
-/*! \} // end sorting
- */
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup predicates
- *  \{
- */
-
-
-/*! \p is_sorted returns \c true if the range <tt>[first, last)</tt> is
- *  sorted in ascending order, and \c false otherwise.
- *
- *  Specifically, this version of \p is_sorted returns \c false if for
- *  some iterator \c i in the range <tt>[first, last - 1)</tt> the
- *  expression <tt>*(i + 1) < *i</tt> is \c true.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return \c true, if the sequence is sorted; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering on objects of \p ForwardIterator's \c value_type is a <em>strict weak ordering</em>, as defined
- *          in the <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *
- *  The following code demonstrates how to use \p is_sorted to test whether the
- *  contents of a \c device_vector are stored in ascending order using the \p thrust::device execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> v(6);
- *  v[0] = 1;
- *  v[1] = 4;
- *  v[2] = 2;
- *  v[3] = 8;
- *  v[4] = 5;
- *  v[5] = 7;
- *
- *  bool result = thrust::is_sorted(thrust::device, v.begin(), v.end());
- *
- *  // result == false
- *
- *  thrust::sort(v.begin(), v.end());
- *  result = thrust::is_sorted(thrust::device, v.begin(), v.end());
- *
- *  // result == true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/is_sorted.html
- *  \see is_sorted_until
- *  \see \c sort
- *  \see \c stable_sort
- *  \see \c less<T>
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-  bool is_sorted(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last);
-
-
-/*! \p is_sorted returns \c true if the range <tt>[first, last)</tt> is
- *  sorted in ascending order, and \c false otherwise.
- *
- *  Specifically, this version of \p is_sorted returns \c false if for
- *  some iterator \c i in the range <tt>[first, last - 1)</tt> the
- *  expression <tt>*(i + 1) < *i</tt> is \c true.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \return \c true, if the sequence is sorted; \c false, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>,
- *          and the ordering on objects of \p ForwardIterator's \c value_type is a <em>strict weak ordering</em>, as defined
- *          in the <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a> requirements.
- *
- *
- *  The following code demonstrates how to use \p is_sorted to test whether the
- *  contents of a \c device_vector are stored in ascending order.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/sort.h>
- *  ...
- *  thrust::device_vector<int> v(6);
- *  v[0] = 1;
- *  v[1] = 4;
- *  v[2] = 2;
- *  v[3] = 8;
- *  v[4] = 5;
- *  v[5] = 7;
- *
- *  bool result = thrust::is_sorted(v.begin(), v.end());
- *
- *  // result == false
- *
- *  thrust::sort(v.begin(), v.end());
- *  result = thrust::is_sorted(v.begin(), v.end());
- *
- *  // result == true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/is_sorted.html
- *  \see is_sorted_until
- *  \see \c sort
- *  \see \c stable_sort
- *  \see \c less<T>
- */
-template<typename ForwardIterator>
-  bool is_sorted(ForwardIterator first,
-                 ForwardIterator last);
-
-
-/*! \p is_sorted returns \c true if the range <tt>[first, last)</tt> is sorted in ascending 
- *  order accoring to a user-defined comparison operation, and \c false otherwise.
- *
- *  Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in
- *  the range <tt>[first, last - 1)</tt> the expression <tt>comp(*(i + 1), *i)</tt> is \c true.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp  Comparison operator.
- *  \return \c true, if the sequence is sorted according to comp; \c false, otherwise.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type
- *          and \c second_argument_type.
- *  \tparam Compare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted to test whether the
- *  contents of a \c device_vector are stored in descending order using the \p thrust::device execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> v(6);
- *  v[0] = 1;
- *  v[1] = 4;
- *  v[2] = 2;
- *  v[3] = 8;
- *  v[4] = 5;
- *  v[5] = 7;
- *
- *  thrust::greater<int> comp;
- *  bool result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp);
- *
- *  // result == false
- *
- *  thrust::sort(v.begin(), v.end(), comp);
- *  result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp);
- *
- *  // result == true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/is_sorted.html
- *  \see \c sort
- *  \see \c stable_sort
- *  \see \c less<T>
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename Compare>
-  bool is_sorted(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp);
-
-
-/*! \p is_sorted returns \c true if the range <tt>[first, last)</tt> is sorted in ascending 
- *  order accoring to a user-defined comparison operation, and \c false otherwise.
- *
- *  Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in
- *  the range <tt>[first, last - 1)</tt> the expression <tt>comp(*(i + 1), *i)</tt> is \c true.
- *
- *  \param first The beginning of the sequence.
- *  \param last  The end of the sequence.
- *  \param comp  Comparison operator.
- *  \return \c true, if the sequence is sorted according to comp; \c false, otherwise.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type
- *          and \c second_argument_type.
- *  \tparam Compare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted to test whether the
- *  contents of a \c device_vector are stored in descending order.
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> v(6);
- *  v[0] = 1;
- *  v[1] = 4;
- *  v[2] = 2;
- *  v[3] = 8;
- *  v[4] = 5;
- *  v[5] = 7;
- *
- *  thrust::greater<int> comp;
- *  bool result = thrust::is_sorted(v.begin(), v.end(), comp);
- *
- *  // result == false
- *
- *  thrust::sort(v.begin(), v.end(), comp);
- *  result = thrust::is_sorted(v.begin(), v.end(), comp);
- *
- *  // result == true
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/is_sorted.html
- *  \see \c sort
- *  \see \c stable_sort
- *  \see \c less<T>
- */
-template<typename ForwardIterator, typename Compare>
-  bool is_sorted(ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp);
-
-
-/*! This version of \p is_sorted_until returns the last iterator \c i in <tt>[first,last]</tt> for
- *  which the range <tt>[first,last)</tt> is sorted using \c operator<. If <tt>distance(first,last) < 2</tt>,
- *  \p is_sorted_until simply returns \p last.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \return The last iterator in the input range for which it is sorted.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a> and
- *          \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted_until to find the first position
- *  in an array where the data becomes unsorted using the \p thrust::host execution policy for
- *  parallelization:
- *  
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/execution_policy.h>
- *
- *  ...
- *   
- *  int A[8] = {0, 1, 2, 3, 0, 1, 2, 3};
- *  
- *  int * B = thrust::is_sorted_until(thrust::host, A, A + 8);
- *  
- *  // B - A is 4
- *  // [A, B) is sorted
- *  \endcode
- *
- *  \see \p is_sorted
- *  \see \p sort
- *  \see \p sort_by_key
- *  \see \p stable_sort
- *  \see \p stable_sort_by_key
- */
-template<typename DerivedPolicy, typename ForwardIterator>
-  ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last);
-
-
-/*! This version of \p is_sorted_until returns the last iterator \c i in <tt>[first,last]</tt> for
- *  which the range <tt>[first,last)</tt> is sorted using \c operator<. If <tt>distance(first,last) < 2</tt>,
- *  \p is_sorted_until simply returns \p last.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \return The last iterator in the input range for which it is sorted.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a> and
- *          \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/LessThanComparable.html">LessThan Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted_until to find the first position
- *  in an array where the data becomes unsorted:
- *  
- *  \code
- *  #include <thrust/sort.h>
- *
- *  ...
- *   
- *  int A[8] = {0, 1, 2, 3, 0, 1, 2, 3};
- *  
- *  int * B = thrust::is_sorted_until(A, A + 8);
- *  
- *  // B - A is 4
- *  // [A, B) is sorted
- *  \endcode
- *
- *  \see \p is_sorted
- *  \see \p sort
- *  \see \p sort_by_key
- *  \see \p stable_sort
- *  \see \p stable_sort_by_key
- */
-template<typename ForwardIterator>
-  ForwardIterator is_sorted_until(ForwardIterator first,
-                                  ForwardIterator last);
-
-
-/*! This version of \p is_sorted_until returns the last iterator \c i in <tt>[first,last]</tt> for
- *  which the range <tt>[first,last)</tt> is sorted using the function object \c comp. If <tt>distance(first,last) < 2</tt>,
- *  \p is_sorted_until simply returns \p last.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization:
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param comp The function object to use for comparison.
- *  \return The last iterator in the input range for which it is sorted.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a> and
- *          \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type.
- *  \tparam Compare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted_until to find the first position
- *  in an array where the data becomes unsorted in descending order using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *
- *  ...
- *   
- *  int A[8] = {3, 2, 1, 0, 3, 2, 1, 0};
- *  
- *  thrust::greater<int> comp;
- *  int * B = thrust::is_sorted_until(thrust::host, A, A + 8, comp);
- *  
- *  // B - A is 4
- *  // [A, B) is sorted in descending order
- *  \endcode
- *
- *  \see \p is_sorted
- *  \see \p sort
- *  \see \p sort_by_key
- *  \see \p stable_sort
- *  \see \p stable_sort_by_key
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename Compare>
-  ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp);
-
-
-/*! This version of \p is_sorted_until returns the last iterator \c i in <tt>[first,last]</tt> for
- *  which the range <tt>[first,last)</tt> is sorted using the function object \c comp. If <tt>distance(first,last) < 2</tt>,
- *  \p is_sorted_until simply returns \p last.
- *
- *  \param first The beginning of the range of interest.
- *  \param last The end of the range of interest.
- *  \param comp The function object to use for comparison.
- *  \return The last iterator in the input range for which it is sorted.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a> and
- *          \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type.
- *  \tparam Compare is a model of <a href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a>.
- *
- *  The following code snippet demonstrates how to use \p is_sorted_until to find the first position
- *  in an array where the data becomes unsorted in descending order:
- *
- *  \code
- *  #include <thrust/sort.h>
- *  #include <thrust/functional.h>
- *
- *  ...
- *   
- *  int A[8] = {3, 2, 1, 0, 3, 2, 1, 0};
- *  
- *  thrust::greater<int> comp;
- *  int * B = thrust::is_sorted_until(A, A + 8, comp);
- *  
- *  // B - A is 4
- *  // [A, B) is sorted in descending order
- *  \endcode
- *
- *  \see \p is_sorted
- *  \see \p sort
- *  \see \p sort_by_key
- *  \see \p stable_sort
- *  \see \p stable_sort_by_key
- */
-template<typename ForwardIterator, typename Compare>
-  ForwardIterator is_sorted_until(ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp);
-
-
-/*! \} // end predicates
- *  \} // end reductions
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/sort.inl>
-
diff --git a/compat/thrust/swap.h b/compat/thrust/swap.h
deleted file mode 100644
index 085e546930..0000000000
--- a/compat/thrust/swap.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file swap.h
- *  \brief Functions for swapping the value of elements
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-// empty Doxygen comment below so namespace thrust's documentation will be extracted
-
-/*!
- */
-namespace thrust
-{
-
-/*! \addtogroup utility
- *  \{
- */
-
-/*! \addtogroup swap
- *  \{
- */
-
-/*! \p swap assigns the contents of \c a to \c b and the
- *  contents of \c b to \c a. This is used as a primitive operation
- *  by many other algorithms.
- *  
- *  \param a The first value of interest. After completion,
- *           the value of b will be returned here.
- *  \param b The second value of interest. After completion,
- *           the value of a will be returned here.
- *
- *  \tparam Assignable is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>.
- *
- *  The following code snippet demonstrates how to use \p swap to
- *  swap the contents of two variables.
- *
- *  \code
- *  #include <thrust/swap.h>
- *  ...
- *  int x = 1;
- *  int y = 2;
- *  thrust::swap(x,h);
- *
- *  // x == 2, y == 1
- *  \endcode
- */
-template<typename Assignable1, typename Assignable2>
-__host__ __device__ 
-inline void swap(Assignable1 &a, Assignable2 &b);
-
-/*! \} // swap
- */
-
-/*! \} // utility
- */
-
-
-/*! \addtogroup copying
- *  \{
- */
-
-
-/*! \p swap_ranges swaps each of the elements in the range <tt>[first1, last1)</tt>
- *  with the corresponding element in the range <tt>[first2, first2 + (last1 - first1))</tt>.
- *  That is, for each integer \c n such that <tt>0 <= n < (last1 - first1)</tt>, it swaps
- *  <tt>*(first1 + n)</tt> and <tt>*(first2 + n)</tt>. The return value is
- *  <tt>first2 + (last1 - first1)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first sequence to swap.
- *  \param last1 One position past the last element of the first sequence to swap.
- *  \param first2 The beginning of the second sequence to swap.
- *  \return An iterator pointing to one position past the last element of the second
- *          sequence to swap.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type.
- *
- *  \pre \p first1 may equal \p first2, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[first2, first2 + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p swap_ranges to
- *  swap the contents of two \c thrust::device_vectors using the \p thrust::device execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/swap.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  thrust::device_vector<int> v1(2), v2(2);
- *  v1[0] = 1;
- *  v1[1] = 2;
- *  v2[0] = 3;
- *  v2[1] = 4;
- *
- *  thrust::swap_ranges(thrust::device, v1.begin(), v1.end(), v2.begin());
- *
- *  // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/swap_ranges.html
- *  \see \c swap
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2);
-
-
-/*! \p swap_ranges swaps each of the elements in the range <tt>[first1, last1)</tt>
- *  with the corresponding element in the range <tt>[first2, first2 + (last1 - first1))</tt>.
- *  That is, for each integer \c n such that <tt>0 <= n < (last1 - first1)</tt>, it swaps
- *  <tt>*(first1 + n)</tt> and <tt>*(first2 + n)</tt>. The return value is
- *  <tt>first2 + (last1 - first1)</tt>.
- *
- *  \param first1 The beginning of the first sequence to swap.
- *  \param last1 One position past the last element of the first sequence to swap.
- *  \param first2 The beginning of the second sequence to swap.
- *  \return An iterator pointing to one position past the last element of the second
- *          sequence to swap.
- *
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type.
- *
- *  \pre \p first1 may equal \p first2, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[first2, first2 + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p swap_ranges to
- *  swap the contents of two \c thrust::device_vectors.
- *
- *  \code
- *  #include <thrust/swap.h>
- *  #include <thrust/device_vector.h>
- *  ...
- *  thrust::device_vector<int> v1(2), v2(2);
- *  v1[0] = 1;
- *  v1[1] = 2;
- *  v2[0] = 3;
- *  v2[1] = 4;
- *
- *  thrust::swap_ranges(v1.begin(), v1.end(), v2.begin());
- *
- *  // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/swap_ranges.html
- *  \see \c swap
- */
-template<typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2);
-
-
-/*! \} // copying
- */
-
-
-} // end thrust
-
-#include <thrust/detail/swap.inl>
-
diff --git a/compat/thrust/system/cpp/detail/adjacent_difference.h b/compat/thrust/system/cpp/detail/adjacent_difference.h
deleted file mode 100644
index ea212ffcd9..0000000000
--- a/compat/thrust/system/cpp/detail/adjacent_difference.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.h
- *  \brief C++ implementation of adjacent_difference.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/adjacent_difference.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &,
-                                   InputIterator first,
-                                   InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::adjacent_difference(first, last, result, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/assign_value.h b/compat/thrust/system/cpp/detail/assign_value.h
deleted file mode 100644
index 847fc97afd..0000000000
--- a/compat/thrust/system/cpp/detail/assign_value.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy, typename Pointer1, typename Pointer2>
-__host__ __device__
-  void assign_value(thrust::system::cpp::detail::execution_policy<DerivedPolicy> &, Pointer1 dst, Pointer2 src)
-{
-  *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src);
-} // end assign_value()
-
-} // end detail
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/binary_search.h b/compat/thrust/system/cpp/detail/binary_search.h
deleted file mode 100644
index 37af539e0d..0000000000
--- a/compat/thrust/system/cpp/detail/binary_search.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.h
- *  \brief C++ implementation of binary search algorithms.
- */
-
-#pragma once
-
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/binary_search.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template <typename ForwardIterator,
-          typename T,
-          typename StrictWeakOrdering>
-ForwardIterator lower_bound(tag,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T& val,
-                            StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp);
-}
-
-
-template <typename ForwardIterator,
-          typename T,
-          typename StrictWeakOrdering>
-ForwardIterator upper_bound(tag,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            const T& val, 
-                            StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::upper_bound(first, last, val, comp);
-}
-
-template <typename ForwardIterator,
-          typename T,
-          typename StrictWeakOrdering>
-bool binary_search(tag,
-                   ForwardIterator first,
-                   ForwardIterator last,
-                   const T& val, 
-                   StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::binary_search(first, last, val, comp);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/copy.h b/compat/thrust/system/cpp/detail/copy.h
deleted file mode 100644
index 7299bbbd7a..0000000000
--- a/compat/thrust/system/cpp/detail/copy.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file copy.h
- *  \brief C++ implementations of copy functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/copy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(tag,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  return thrust::system::detail::internal::scalar::copy(first, last, result);
-}
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(tag,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  return thrust::system::detail::internal::scalar::copy_n(first, n, result);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/copy_if.h b/compat/thrust/system/cpp/detail/copy_if.h
deleted file mode 100644
index 2faadfa1b7..0000000000
--- a/compat/thrust/system/cpp/detail/copy_if.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/copy_if.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(tag,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::copy_if(first, last, stencil, result, pred);
-}
-
-} // end detail
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/count.h b/compat/thrust/system/cpp/detail/count.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/count.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/dispatch/sort.h b/compat/thrust/system/cpp/detail/dispatch/sort.h
deleted file mode 100644
index 2a03cf62bc..0000000000
--- a/compat/thrust/system/cpp/detail/dispatch/sort.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/reverse.h>
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-#include <thrust/system/detail/internal/scalar/stable_merge_sort.h>
-#include <thrust/system/detail/internal/scalar/stable_radix_sort.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-namespace dispatch
-{
-
-////////////////
-// Radix Sort //
-////////////////
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp,
-                 thrust::detail::true_type)
-{
-  thrust::system::detail::internal::scalar::stable_radix_sort(first, last);
-        
-  // if comp is greater<T> then reverse the keys
-  typedef typename thrust::iterator_traits<RandomAccessIterator>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-
-  if (reverse)
-    thrust::reverse(first, last);
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp,
-                        thrust::detail::true_type)
-{
-  // if comp is greater<T> then reverse the keys and values
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-
-  // note, we also have to reverse the (unordered) input to preserve stability
-  if (reverse)
-  {
-    thrust::reverse(first1,  last1);
-    thrust::reverse(first2, first2 + (last1 - first1));
-  }
-
-  thrust::system::detail::internal::scalar::stable_radix_sort_by_key(first1, last1, first2);
-
-  if (reverse)
-  {
-    thrust::reverse(first1,  last1);
-    thrust::reverse(first2, first2 + (last1 - first1));
-  }
-}
-
-////////////////
-// Merge Sort //
-////////////////
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp,
-                 thrust::detail::false_type)
-{
-  thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp);
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp,
-                        thrust::detail::false_type)
-{
-  thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp);
-}
-
-} // end namespace dispatch
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/equal.h b/compat/thrust/system/cpp/detail/equal.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/equal.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/execution_policy.h b/compat/thrust/system/cpp/detail/execution_policy.h
deleted file mode 100644
index 229ff5c6c8..0000000000
--- a/compat/thrust/system/cpp/detail/execution_policy.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-// put the canonical tag in the same ns as the backend's entry points
-namespace cpp
-{
-namespace detail
-{
-
-// this awkward sequence of definitions arise
-// from the desire both for tag to derive
-// from execution_policy and for execution_policy
-// to convert to tag (when execution_policy is not
-// an ancestor of tag)
-
-// forward declaration of tag
-struct tag;
-
-// forward declaration of execution_policy
-template<typename> struct execution_policy;
-
-// specialize execution_policy for tag
-template<>
-  struct execution_policy<tag>
-    : thrust::execution_policy<tag>
-{};
-
-// tag's definition comes before the
-// generic definition of execution_policy
-struct tag : execution_policy<tag> {};
-
-// allow conversion to tag when it is not a successor
-template<typename Derived>
-  struct execution_policy
-    : thrust::execution_policy<Derived>
-{
-  // allow conversion to tag
-  inline operator tag () const
-  {
-    return tag();
-  }
-};
-
-} // end detail
-
-// alias execution_policy and tag here
-using thrust::system::cpp::detail::execution_policy;
-using thrust::system::cpp::detail::tag;
-
-} // end cpp
-} // end system
-
-// alias items at top-level
-namespace cpp
-{
-
-using thrust::system::cpp::execution_policy;
-using thrust::system::cpp::tag;
-
-} // end cpp
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/extrema.h b/compat/thrust/system/cpp/detail/extrema.h
deleted file mode 100644
index 3eab6d406d..0000000000
--- a/compat/thrust/system/cpp/detail/extrema.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file extrema.h
- *  \brief C++ implementations of extrema functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/extrema.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(execution_policy<DerivedPolicy> &,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  return thrust::system::detail::internal::scalar::min_element(first, last, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(execution_policy<DerivedPolicy> &,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  return thrust::system::detail::internal::scalar::max_element(first, last, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(execution_policy<DerivedPolicy> &,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp)
-{
-  return thrust::system::detail::internal::scalar::minmax_element(first, last, comp);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/fill.h b/compat/thrust/system/cpp/detail/fill.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/fill.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/find.h b/compat/thrust/system/cpp/detail/find.h
deleted file mode 100644
index 9698524ed8..0000000000
--- a/compat/thrust/system/cpp/detail/find.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file find.h
- *  \brief C++ implementation of find_if. 
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/find.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template <typename InputIterator,
-          typename Predicate>
-InputIterator find_if(tag,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::find_if(first, last, pred);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/for_each.h b/compat/thrust/system/cpp/detail/for_each.h
deleted file mode 100644
index 8d4e1c730b..0000000000
--- a/compat/thrust/system/cpp/detail/for_each.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/for_each.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(thrust::system::cpp::detail::execution_policy<DerivedPolicy> &,
-                       InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f)
-{
-  return thrust::system::detail::internal::scalar::for_each(first, last, f);
-}
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(thrust::system::cpp::detail::execution_policy<DerivedPolicy> &,
-                         InputIterator first,
-                         Size n,
-                         UnaryFunction f)
-{
-  return thrust::system::detail::internal::scalar::for_each_n(first, n, f);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/gather.h b/compat/thrust/system/cpp/detail/gather.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/gather.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/generate.h b/compat/thrust/system/cpp/detail/generate.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/generate.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/get_value.h b/compat/thrust/system/cpp/detail/get_value.h
deleted file mode 100644
index 5ddb2c8349..0000000000
--- a/compat/thrust/system/cpp/detail/get_value.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy, typename Pointer>
-__host__ __device__
-  typename thrust::iterator_value<Pointer>::type
-    get_value(thrust::system::cpp::detail::execution_policy<DerivedPolicy> &, Pointer ptr)
-{
-  return *thrust::raw_pointer_cast(ptr);
-} // end get_value()
-
-
-} // end detail
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/inner_product.h b/compat/thrust/system/cpp/detail/inner_product.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/inner_product.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/iter_swap.h b/compat/thrust/system/cpp/detail/iter_swap.h
deleted file mode 100644
index 257276ffea..0000000000
--- a/compat/thrust/system/cpp/detail/iter_swap.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/swap.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-template<typename Pointer1, typename Pointer2>
-__host__ __device__
-  void iter_swap(tag, Pointer1 a, Pointer2 b)
-{
-  using thrust::swap;
-  swap(*thrust::raw_pointer_cast(a), *thrust::raw_pointer_cast(b));
-} // end iter_swap()
-
-
-} // end detail
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/logical.h b/compat/thrust/system/cpp/detail/logical.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/logical.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/malloc_and_free.h b/compat/thrust/system/cpp/detail/malloc_and_free.h
deleted file mode 100644
index 4f8ae82092..0000000000
--- a/compat/thrust/system/cpp/detail/malloc_and_free.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <cstdlib> // for malloc & free
-#include <thrust/system/cpp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-// note that malloc returns a raw pointer to avoid
-// depending on the heavyweight thrust/system/cpp/memory.h header
-template<typename DerivedPolicy>
-  void *malloc(execution_policy<DerivedPolicy> &, std::size_t n)
-{
-  return std::malloc(n);
-} // end malloc()
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void free(execution_policy<DerivedPolicy> &, Pointer ptr)
-{
-  std::free(thrust::raw_pointer_cast(ptr));
-} // end free()
-
-
-} // end detail
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/memory.inl b/compat/thrust/system/cpp/detail/memory.inl
deleted file mode 100644
index 7f9a48dee7..0000000000
--- a/compat/thrust/system/cpp/detail/memory.inl
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/memory.h>
-#include <thrust/system/cpp/detail/malloc_and_free.h>
-#include <limits>
-
-namespace thrust
-{
-
-// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing
-//     pointer_raw_pointer for pointer by specializing it here
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-namespace detail
-{
-
-template<typename T>
-  struct pointer_raw_pointer< thrust::cpp::pointer<T> >
-{
-  typedef typename thrust::cpp::pointer<T>::raw_pointer type;
-}; // end pointer_raw_pointer
-
-} // end detail
-#endif
-
-namespace system
-{
-namespace cpp
-{
-
-
-template<typename T>
-  template<typename OtherT>
-    reference<T> &
-      reference<T>
-        ::operator=(const reference<OtherT> &other)
-{
-  return super_t::operator=(other);
-} // end reference::operator=()
-
-template<typename T>
-  reference<T> &
-    reference<T>
-      ::operator=(const value_type &x)
-{
-  return super_t::operator=(x);
-} // end reference::operator=()
-
-template<typename T>
-__host__ __device__
-void swap(reference<T> a, reference<T> b)
-{
-  a.swap(b);
-} // end swap()
-
-pointer<void> malloc(std::size_t n)
-{
-  tag t;
-  return pointer<void>(thrust::system::cpp::detail::malloc(t, n));
-} // end malloc()
-
-template<typename T>
-pointer<T> malloc(std::size_t n)
-{
-  pointer<void> raw_ptr = thrust::system::cpp::malloc(sizeof(T) * n);
-  return pointer<T>(reinterpret_cast<T*>(raw_ptr.get()));
-} // end malloc()
-
-void free(pointer<void> ptr)
-{
-  tag t;
-  return thrust::system::cpp::detail::free(t, ptr);
-} // end free()
-
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/merge.h b/compat/thrust/system/cpp/detail/merge.h
deleted file mode 100644
index 7f01c0713a..0000000000
--- a/compat/thrust/system/cpp/detail/merge.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/merge.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-OutputIterator merge(execution_policy<DerivedPolicy> &,
-                     InputIterator1 first1,
-                     InputIterator1 last1,
-                     InputIterator2 first2,
-                     InputIterator2 last2,
-                     OutputIterator result,
-                     StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::merge(first1, last1, first2, last2, result, comp);
-}
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename InputIterator4,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename StrictWeakOrdering>
-thrust::pair<OutputIterator1,OutputIterator2>
-  merge_by_key(execution_policy<DerivedPolicy> &,
-               InputIterator1 keys_first1,
-               InputIterator1 keys_last1,
-               InputIterator2 keys_first2,
-               InputIterator2 keys_last2,
-               InputIterator3 values_first1,
-               InputIterator4 values_first2,
-               OutputIterator1 keys_result,
-               OutputIterator2 values_result,
-               StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::merge_by_key(keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/mismatch.h b/compat/thrust/system/cpp/detail/mismatch.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/mismatch.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/par.h b/compat/thrust/system/cpp/detail/par.h
deleted file mode 100644
index 953e5274db..0000000000
--- a/compat/thrust/system/cpp/detail/par.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/execute_with_allocator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-struct par_t : thrust::system::cpp::detail::execution_policy<par_t>
-{
-  par_t() : thrust::system::cpp::detail::execution_policy<par_t>() {}
-
-  template<typename Allocator>
-    thrust::detail::execute_with_allocator<Allocator, thrust::system::cpp::detail::execution_policy>
-      operator()(Allocator &alloc) const
-  {
-    return thrust::detail::execute_with_allocator<Allocator, thrust::system::cpp::detail::execution_policy>(alloc);
-  }
-};
-
-
-} // end detail
-
-
-static const detail::par_t par;
-
-
-} // end cpp
-} // end system
-
-
-// alias par here
-namespace cpp
-{
-
-
-using thrust::system::cpp::par;
-
-
-} // end cpp
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/detail/partition.h b/compat/thrust/system/cpp/detail/partition.h
deleted file mode 100644
index 25a4f1c346..0000000000
--- a/compat/thrust/system/cpp/detail/partition.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file partition.h
- *  \brief cpp implementations of partition functions
- */
-
-#pragma once
-
-#include <thrust/pair.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/partition.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(tag,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::stable_partition(first, last, pred);
-}
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(tag,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::stable_partition(first, last, stencil, pred);
-}
-
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(tag,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, out_true, out_false, pred);
-}
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(tag,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, stencil, out_true, out_false, pred);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/reduce.h b/compat/thrust/system/cpp/detail/reduce.h
deleted file mode 100644
index 5428206ba3..0000000000
--- a/compat/thrust/system/cpp/detail/reduce.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief C++ implementation of reduce algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/reduce.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &,
-                    InputIterator begin,
-                    InputIterator end,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::reduce(begin, end, init, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/reduce_by_key.h b/compat/thrust/system/cpp/detail/reduce_by_key.h
deleted file mode 100644
index 22dc2d9d3d..0000000000
--- a/compat/thrust/system/cpp/detail/reduce_by_key.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/pair.h>
-#include <thrust/system/detail/internal/scalar/reduce_by_key.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(execution_policy<DerivedPolicy> &,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::reduce_by_key(keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/remove.h b/compat/thrust/system/cpp/detail/remove.h
deleted file mode 100644
index cf2202bee3..0000000000
--- a/compat/thrust/system/cpp/detail/remove.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/remove.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(tag,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::remove_if(first, last, pred);
-}
-
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(tag,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::remove_if(first, last, stencil, pred);
-}
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(tag,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::remove_copy_if(first, last, result, pred);
-}
-
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(tag,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  return thrust::system::detail::internal::scalar::remove_copy_if(first, last, stencil, result, pred);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/replace.h b/compat/thrust/system/cpp/detail/replace.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/replace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/reverse.h b/compat/thrust/system/cpp/detail/reverse.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/reverse.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/scan.h b/compat/thrust/system/cpp/detail/scan.h
deleted file mode 100644
index d4bae1e739..0000000000
--- a/compat/thrust/system/cpp/detail/scan.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.h
- *  \brief C++ implementations of scan functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(execution_policy<DerivedPolicy> &,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::inclusive_scan(first, last, result, binary_op);
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(execution_policy<DerivedPolicy> &,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::exclusive_scan(first, last, result, init, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/scan_by_key.h b/compat/thrust/system/cpp/detail/scan_by_key.h
deleted file mode 100644
index 4165d842fd..0000000000
--- a/compat/thrust/system/cpp/detail/scan_by_key.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/scan_by_key.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan_by_key(tag,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::inclusive_scan_by_key(first1, last1, first2, result, binary_pred, binary_op);
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan_by_key(tag,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       BinaryFunction binary_op)
-{
-  return thrust::system::detail::internal::scalar::exclusive_scan_by_key(first1, last1, first2, result, init, binary_pred, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/scatter.h b/compat/thrust/system/cpp/detail/scatter.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/scatter.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/sequence.h b/compat/thrust/system/cpp/detail/sequence.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/sequence.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/set_operations.h b/compat/thrust/system/cpp/detail/set_operations.h
deleted file mode 100644
index 07ce71257b..0000000000
--- a/compat/thrust/system/cpp/detail/set_operations.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/set_operations.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_difference(execution_policy<ExecutionPolicy> &,
-                                InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result,
-                                StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::set_difference(first1, last1, first2, last2, result, comp);
-}
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_intersection(execution_policy<ExecutionPolicy> &,
-                                  InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result,
-                                  StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::set_intersection(first1, last1, first2, last2, result, comp);
-}
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_symmetric_difference(execution_policy<ExecutionPolicy> &,
-                                          InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result,
-                                          StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::set_symmetric_difference(first1, last1, first2, last2, result, comp);
-}
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_union(execution_policy<ExecutionPolicy> &,
-                           InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result,
-                           StrictWeakOrdering comp)
-{
-  return thrust::system::detail::internal::scalar::set_union(first1, last1, first2, last2, result, comp);
-}
-
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/sort.h b/compat/thrust/system/cpp/detail/sort.h
deleted file mode 100644
index 60244e22a3..0000000000
--- a/compat/thrust/system/cpp/detail/sort.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/sort.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(execution_policy<DerivedPolicy> &,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  thrust::system::detail::internal::scalar::stable_sort(first, last, comp);
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(execution_policy<DerivedPolicy> &,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp)
-{
-  thrust::system::detail::internal::scalar::stable_sort_by_key(keys_first, keys_last, values_first, comp);
-}
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/swap_ranges.h b/compat/thrust/system/cpp/detail/swap_ranges.h
deleted file mode 100644
index a834a2c0ed..0000000000
--- a/compat/thrust/system/cpp/detail/swap_ranges.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// cpp has no special swap_ranges
-
diff --git a/compat/thrust/system/cpp/detail/tabulate.h b/compat/thrust/system/cpp/detail/tabulate.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/tabulate.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/temporary_buffer.h b/compat/thrust/system/cpp/detail/temporary_buffer.h
deleted file mode 100644
index 628bd75719..0000000000
--- a/compat/thrust/system/cpp/detail/temporary_buffer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special temporary buffer functions
-
diff --git a/compat/thrust/system/cpp/detail/transform.h b/compat/thrust/system/cpp/detail/transform.h
deleted file mode 100644
index 5909d4a4fb..0000000000
--- a/compat/thrust/system/cpp/detail/transform.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// cpp has no special transform
-
diff --git a/compat/thrust/system/cpp/detail/transform_reduce.h b/compat/thrust/system/cpp/detail/transform_reduce.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/transform_reduce.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/transform_scan.h b/compat/thrust/system/cpp/detail/transform_scan.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/transform_scan.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/uninitialized_copy.h b/compat/thrust/system/cpp/detail/uninitialized_copy.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/uninitialized_copy.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/uninitialized_fill.h b/compat/thrust/system/cpp/detail/uninitialized_fill.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cpp/detail/uninitialized_fill.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cpp/detail/unique.h b/compat/thrust/system/cpp/detail/unique.h
deleted file mode 100644
index cf740498e1..0000000000
--- a/compat/thrust/system/cpp/detail/unique.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/pair.h>
-#include <thrust/system/detail/internal/scalar/unique.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(execution_policy<DerivedPolicy> &,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  return thrust::system::detail::internal::scalar::unique(first, last, binary_pred);
-}
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(execution_policy<DerivedPolicy> &,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  return thrust::system::detail::internal::scalar::unique_copy(first, last, output, binary_pred);
-}
-
-} // end namespace detail
-} // end namespace cpp 
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/unique_by_key.h b/compat/thrust/system/cpp/detail/unique_by_key.h
deleted file mode 100644
index a9f13d6a27..0000000000
--- a/compat/thrust/system/cpp/detail/unique_by_key.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/pair.h>
-#include <thrust/system/detail/internal/scalar/unique_by_key.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(execution_policy<DerivedPolicy> &,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  return thrust::system::detail::internal::scalar::unique_by_key(keys_first, keys_last, values_first, binary_pred);
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(execution_policy<DerivedPolicy> &,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_output, values_output, binary_pred);
-}
-
-} // end namespace detail
-} // end namespace cpp 
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cpp/detail/vector.inl b/compat/thrust/system/cpp/detail/vector.inl
deleted file mode 100644
index 03bffcd8aa..0000000000
--- a/compat/thrust/system/cpp/detail/vector.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/vector.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector()
-      : super_t()
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n)
-      : super_t(n)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n, const value_type &value)
-      : super_t(n,value)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(const vector &x)
-      : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const std::vector<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename InputIterator>
-    vector<T,Allocator>
-      ::vector(InputIterator first, InputIterator last)
-        : super_t(first,last)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const std::vector<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-      
-} // end cpp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cpp/execution_policy.h b/compat/thrust/system/cpp/execution_policy.h
deleted file mode 100644
index f192eb9659..0000000000
--- a/compat/thrust/system/cpp/execution_policy.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-/*! \file thrust/system/cpp/execution_policy.h
- *  \brief Execution policies for Thrust's standard C++ system.
- */
-
-#include <thrust/detail/config.h>
-
-// get the execution policies definitions first
-#include <thrust/system/cpp/detail/execution_policy.h>
-
-// get the definition of par
-#include <thrust/system/cpp/detail/par.h>
-
-// now get all the algorithm definitions
-
-#include <thrust/system/cpp/detail/adjacent_difference.h>
-#include <thrust/system/cpp/detail/assign_value.h>
-#include <thrust/system/cpp/detail/binary_search.h>
-#include <thrust/system/cpp/detail/copy.h>
-#include <thrust/system/cpp/detail/copy_if.h>
-#include <thrust/system/cpp/detail/count.h>
-#include <thrust/system/cpp/detail/equal.h>
-#include <thrust/system/cpp/detail/extrema.h>
-#include <thrust/system/cpp/detail/fill.h>
-#include <thrust/system/cpp/detail/find.h>
-#include <thrust/system/cpp/detail/for_each.h>
-#include <thrust/system/cpp/detail/gather.h>
-#include <thrust/system/cpp/detail/generate.h>
-#include <thrust/system/cpp/detail/get_value.h>
-#include <thrust/system/cpp/detail/inner_product.h>
-#include <thrust/system/cpp/detail/iter_swap.h>
-#include <thrust/system/cpp/detail/logical.h>
-#include <thrust/system/cpp/detail/malloc_and_free.h>
-#include <thrust/system/cpp/detail/merge.h>
-#include <thrust/system/cpp/detail/mismatch.h>
-#include <thrust/system/cpp/detail/partition.h>
-#include <thrust/system/cpp/detail/reduce.h>
-#include <thrust/system/cpp/detail/reduce_by_key.h>
-#include <thrust/system/cpp/detail/remove.h>
-#include <thrust/system/cpp/detail/replace.h>
-#include <thrust/system/cpp/detail/reverse.h>
-#include <thrust/system/cpp/detail/scan.h>
-#include <thrust/system/cpp/detail/scan_by_key.h>
-#include <thrust/system/cpp/detail/scatter.h>
-#include <thrust/system/cpp/detail/sequence.h>
-#include <thrust/system/cpp/detail/set_operations.h>
-#include <thrust/system/cpp/detail/sort.h>
-#include <thrust/system/cpp/detail/swap_ranges.h>
-#include <thrust/system/cpp/detail/tabulate.h>
-#include <thrust/system/cpp/detail/transform.h>
-#include <thrust/system/cpp/detail/transform_reduce.h>
-#include <thrust/system/cpp/detail/transform_scan.h>
-#include <thrust/system/cpp/detail/uninitialized_copy.h>
-#include <thrust/system/cpp/detail/uninitialized_fill.h>
-#include <thrust/system/cpp/detail/unique.h>
-#include <thrust/system/cpp/detail/unique_by_key.h>
-
-
-// define these entities here for the purpose of Doxygenating them
-// they are actually defined elsewhere
-#if 0
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-
-
-/*! \addtogroup execution_policies
- *  \{
- */
-
-
-/*! \p thrust::system::cpp::execution_policy is the base class for all Thrust parallel execution
- *  policies which are derived from Thrust's standard C++ backend system.
- */
-template<typename DerivedPolicy>
-struct execution_policy : thrust::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p thrust::system::cpp::tag is a type representing Thrust's standard C++ backend system in C++'s type system.
- *  Iterators "tagged" with a type which is convertible to \p cpp::tag assert that they may be
- *  "dispatched" to algorithm implementations in the \p cpp system.
- */
-struct tag : thrust::system::cpp::execution_policy<tag> { unspecified };
-
-
-/*! 
- *  \p thrust::system::cpp::par is the parallel execution policy associated with Thrust's standard
- *  C++ backend system.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may
- *  directly target Thrust's C++ backend system by providing \p thrust::cpp::par as an algorithm
- *  parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such
- *  as \p thrust::cpp::vector.
- *
- *  The type of \p thrust::cpp::par is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::cpp::par to explicitly dispatch an
- *  invocation of \p thrust::for_each to the standard C++ backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/system/cpp/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  int vec[3];
- *  vec[0] = 0; vec[1] = 1; vec[2] = 2;
- *
- *  thrust::for_each(thrust::cpp::par, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- */
-static const unspecified par;
-
-
-/*! \}
- */
-
-
-} // end cpp
-} // end system
-} // end thrust
-#endif
-
-
diff --git a/compat/thrust/system/cpp/memory.h b/compat/thrust/system/cpp/memory.h
deleted file mode 100644
index f3a58b8c32..0000000000
--- a/compat/thrust/system/cpp/memory.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/cpp/memory.h
- *  \brief Managing memory associated with Thrust's standard C++ system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/memory.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/allocator/malloc_allocator.h>
-#include <ostream>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-
-template<typename> class pointer;
-
-} // end cpp
-} // end system
-} // end thrust
-
-
-/*! \cond
- */
-
-// specialize std::iterator_traits to avoid problems with the name of
-// pointer's constructor shadowing its nested pointer type
-// do this before pointer is defined so the specialization is correctly
-// used inside the definition
-namespace std
-{
-
-template<typename Element>
-  struct iterator_traits<thrust::system::cpp::pointer<Element> >
-{
-  private:
-    typedef thrust::system::cpp::pointer<Element> ptr;
-
-  public:
-    typedef typename ptr::iterator_category       iterator_category;
-    typedef typename ptr::value_type              value_type;
-    typedef typename ptr::difference_type         difference_type;
-    typedef ptr                                   pointer;
-    typedef typename ptr::reference               reference;
-}; // end iterator_traits
-
-} // end std
-
-/*! \endcond
- */
-
-
-namespace thrust
-{
-namespace system
-{
-
-/*! \addtogroup system_backends Systems
- *  \ingroup system
- *  \{
- */
-
-/*! \namespace thrust::system::cpp
- *  \brief \p thrust::system::cpp is the namespace containing functionality for allocating, manipulating,
- *         and deallocating memory available to Thrust's standard C++ backend system.
- *         The identifiers are provided in a separate namespace underneath <tt>thrust::system</tt>
- *         for import convenience but are also aliased in the top-level <tt>thrust::cpp</tt>
- *         namespace for easy access.
- *
- */
-namespace cpp
-{
-
-// forward declaration of reference for pointer
-template<typename Element> class reference;
-
-/*! \cond
- */
-
-// XXX nvcc + msvc have trouble instantiating reference below
-//     this is a workaround
-namespace detail
-{
-
-template<typename Element>
-  struct reference_msvc_workaround
-{
-  typedef thrust::system::cpp::reference<Element> type;
-}; // end reference_msvc_workaround
-
-} // end detail
-
-/*! \endcond
- */
-
-
-/*! \p pointer stores a pointer to an object allocated in memory available to the cpp system.
- *  This type provides type safety when dispatching standard algorithms on ranges resident
- *  in cpp memory.
- *
- *  \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic.
- *
- *  \p pointer can be created with the function \p cpp::malloc, or by explicitly calling its constructor
- *  with a raw pointer.
- *
- *  The raw pointer encapsulated by a \p pointer may be obtained by eiter its <tt>get</tt> member function
- *  or the \p raw_pointer_cast function.
- *
- *  \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory
- *  pointed to by \p pointer.
- *
- *  \tparam T specifies the type of the pointee.
- *
- *  \see cpp::malloc
- *  \see cpp::free
- *  \see raw_pointer_cast
- */
-template<typename T>
-  class pointer
-    : public thrust::pointer<
-               T,
-               thrust::system::cpp::tag,
-               thrust::system::cpp::reference<T>,
-               thrust::system::cpp::pointer<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::pointer<
-      T,
-      thrust::system::cpp::tag,
-      //thrust::system::cpp::reference<T>,
-      typename detail::reference_msvc_workaround<T>::type,
-      thrust::system::cpp::pointer<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    // note that cpp::pointer's member functions need __host__ __device__
-    // to interoperate with nvcc + iterators' dereference member function
-
-    /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0.
-     */
-    __host__ __device__
-    pointer() : super_t() {}
-
-    /*! This constructor allows construction of a <tt>pointer<const T></tt> from a <tt>T*</tt>.
-     *
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in memory
-     *         accessible by the \p cpp system.
-     *  \tparam OtherT \p OtherT shall be convertible to \p T.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    explicit pointer(OtherT *ptr) : super_t(ptr) {}
-
-    /*! This constructor allows construction from another pointer-like object with related type.
-     *
-     *  \param other The \p OtherPointer to copy.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::cpp::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer
-            >::type * = 0) : super_t(other) {}
-
-    /*! Assignment operator allows assigning from another pointer-like object with related type.
-     *
-     *  \param other The other pointer-like object to assign from.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::cpp::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      pointer &
-    >::type
-    operator=(const OtherPointer &other)
-    {
-      return super_t::operator=(other);
-    }
-}; // end pointer
-
-
-/*! \p reference is a wrapped reference to an object stored in memory available to the \p cpp system.
- *  \p reference is the type of the result of dereferencing a \p cpp::pointer.
- *
- *  \tparam T Specifies the type of the referenced object.
- */
-template<typename T>
-  class reference
-    : public thrust::reference<
-               T,
-               thrust::system::cpp::pointer<T>,
-               thrust::system::cpp::reference<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::reference<
-      T,
-      thrust::system::cpp::pointer<T>,
-      thrust::system::cpp::reference<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    /*! \cond
-     */
-
-    typedef typename super_t::value_type value_type;
-    typedef typename super_t::pointer    pointer;
-
-    /*! \endcond
-     */
-
-    /*! This constructor initializes this \p reference to refer to an object
-     *  pointed to by the given \p pointer. After this \p reference is constructed,
-     *  it shall refer to the object pointed to by \p ptr.
-     *
-     *  \param ptr A \p pointer to copy from.
-     */
-    __host__ __device__
-    explicit reference(const pointer &ptr)
-      : super_t(ptr)
-    {}
-
-    /*! This constructor accepts a const reference to another \p reference of related type.
-     *  After this \p reference is constructed, it shall refer to the same object as \p other.
-     *
-     *  \param other A \p reference to copy from.
-     *  \tparam OtherT The element type of the other \p reference.
-     *
-     *  \note This constructor is templated primarily to allow initialization of <tt>reference<const T></tt>
-     *        from <tt>reference<T></tt>.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    reference(const reference<OtherT> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherT>::pointer,
-                pointer
-              >::type * = 0)
-      : super_t(other)
-    {}
-
-    /*! Copy assignment operator copy assigns from another \p reference of related type.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>*this</tt>
-     *  \tparam OtherT The element type of the other \p reference.
-     */
-    template<typename OtherT>
-    reference &operator=(const reference<OtherT> &other);
-
-    /*! Assignment operator assigns from a \p value_type.
-     *
-     *  \param x The \p value_type to assign from.
-     *  \return <tt>*this</tt>
-     */
-    reference &operator=(const value_type &x);
-}; // end reference
-
-/*! Exchanges the values of two objects referred to by \p reference.
- *  \p x The first \p reference of interest.
- *  \p y The second \p reference ot interest.
- */
-template<typename T>
-__host__ __device__
-void swap(reference<T> x, reference<T> y);
-
-/*! Allocates an area of memory available to Thrust's <tt>cpp</tt> system.
- *  \param n Number of bytes to allocate.
- *  \return A <tt>cpp::pointer<void></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>cpp::pointer<void></tt> is returned if
- *          an error occurs.
- *  \note The <tt>cpp::pointer<void></tt> returned by this function must be
- *        deallocated with \p cpp::free.
- *  \see cpp::free
- *  \see std::malloc
- */
-inline pointer<void> malloc(std::size_t n);
-
-/*! Allocates a typed area of memory available to Thrust's <tt>cpp</tt> system.
- *  \param n Number of elements to allocate.
- *  \return A <tt>cpp::pointer<T></tt> pointing to the beginning of the newly
- *          allocated elements. A null <tt>cpp::pointer<T></tt> is returned if
- *          an error occurs.
- *  \note The <tt>cpp::pointer<T></tt> returned by this function must be
- *        deallocated with \p cpp::free.
- *  \see cpp::free
- *  \see std::malloc
- */
-template<typename T>
-inline pointer<T> malloc(std::size_t n);
-
-/*! Deallocates an area of memory previously allocated by <tt>cpp::malloc</tt>.
- *  \param ptr A <tt>cpp::pointer<void></tt> pointing to the beginning of an area
- *         of memory previously allocated with <tt>cpp::malloc</tt>.
- *  \see cpp::malloc
- *  \see std::free
- */
-inline void free(pointer<void> ptr);
-
-// XXX upon c++11
-// template<typename T> using allocator = thrust::detail::malloc_allocator<T,tag,pointer<T> >;
-
-/*! \p cpp::allocator is the default allocator used by the \p cpp system's containers such as
- *  <tt>cpp::vector</tt> if no user-specified allocator is provided. \p cpp::allocator allocates
- *  (deallocates) storage with \p cpp::malloc (\p cpp::free).
- */
-template<typename T>
-  struct allocator
-    : thrust::detail::malloc_allocator<
-        T,
-        tag,
-        pointer<T>
-      >
-{
-  /*! The \p rebind metafunction provides the type of an \p allocator
-   *  instantiated with another type.
-   *
-   *  \tparam U The other type to use for instantiation.
-   */
-  template<typename U>
-    struct rebind
-  {
-    /*! The typedef \p other gives the type of the rebound \p allocator.
-     */
-    typedef allocator<U> other;
-  };
-
-  /*! No-argument constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator() {}
-
-  /*! Copy constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator(const allocator &) {}
-
-  /*! Constructor from other \p allocator has no effect.
-   */
-  template<typename U>
-  __host__ __device__
-  inline allocator(const allocator<U> &) {}
-
-  /*! Destructor has no effect.
-   */
-  __host__ __device__
-  inline ~allocator() {}
-}; // end allocator
-
-} // end cpp
-
-/*! \}
- */
-
-} // end system
-
-/*! \namespace thrust::cpp
- *  \brief \p thrust::cpp is a top-level alias for thrust::system::cpp.
- */
-namespace cpp
-{
-
-using thrust::system::cpp::pointer;
-using thrust::system::cpp::reference;
-using thrust::system::cpp::malloc;
-using thrust::system::cpp::free;
-using thrust::system::cpp::allocator;
-
-} // end cpp
-
-} // end thrust
-
-#include <thrust/system/cpp/detail/memory.inl>
-
diff --git a/compat/thrust/system/cpp/vector.h b/compat/thrust/system/cpp/vector.h
deleted file mode 100644
index 4282df991a..0000000000
--- a/compat/thrust/system/cpp/vector.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/cpp/vector.h
- *  \brief A dynamically-sizable array of elements which reside in memory available to
- *         Thrust's standard C++ system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/memory.h>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-
-// forward declaration of host_vector
-template<typename T, typename Allocator> class host_vector;
-
-namespace system
-{
-namespace cpp
-{
-
-// XXX upon c++11
-// template<typename T, typename Allocator = allocator<T> > using vector = thrust::detail::vector_base<T,Allocator>;
-
-/*! \p cpp::vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p cpp::vector may vary dynamically; memory management is
- *  automatic. The elements contained in a \p cpp::vector reside in memory
- *  available to the \p cpp system.
- *
- *  \tparam T The element type of the \p cpp::vector.
- *  \tparam Allocator The allocator type of the \p cpp::vector. Defaults to \p cpp::allocator.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see host_vector For the documentation of the complete interface which is
- *                   shared by \p cpp::vector
- *  \see device_vector
- */
-template<typename T, typename Allocator = allocator<T> >
-  class vector
-    : public thrust::detail::vector_base<T,Allocator>
-{
-  /*! \cond
-   */
-  private:
-    typedef thrust::detail::vector_base<T,Allocator> super_t;
-  /*! \endcond
-   */
-
-  public:
-
-  /*! \cond
-   */
-    typedef typename super_t::size_type  size_type;
-    typedef typename super_t::value_type value_type;
-
-  /*! \endcond
-   */
-
-    /*! This constructor creates an empty \p cpp::vector.
-     */
-    vector();
-
-    /*! This constructor creates a \p cpp::vector with \p n default-constructed elements.
-     *  \param n The size of the \p cpp::vector to create.
-     */
-    explicit vector(size_type n);
-
-    /*! This constructor creates a \p cpp::vector with \p n copies of \p value.
-     *  \param n The size of the \p cpp::vector to create.
-     *  \param value An element to copy.
-     */
-    explicit vector(size_type n, const value_type &value);
-
-    /*! Copy constructor copies from another \p cpp::vector.
-     *  \param x The other \p cpp::vector to copy.
-     */
-    vector(const vector &x);
-
-    /*! This constructor copies from another Thrust vector-like object.
-     *  \param x The other object to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-
-    /*! This constructor copies from a \c std::vector.
-     *  \param x The \c std::vector to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! This constructor creates a \p cpp::vector by copying from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    vector(InputIterator first, InputIterator last);
-
-    // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns
-
-    /*! Assignment operator assigns from a \c std::vector.
-     *  \param x The \c std::vector to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! Assignment operator assigns from another Thrust vector-like object.
-     *  \param x The other object to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-}; // end vector
-
-} // end cpp
-} // end system
-
-// alias system::cpp names at top-level
-namespace cpp
-{
-
-using thrust::system::cpp::vector;
-
-} // end cpp
-
-} // end thrust
-
-#include <thrust/system/cpp/detail/vector.inl>
-
diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.h b/compat/thrust/system/cuda/detail/adjacent_difference.h
deleted file mode 100644
index ec51794ff9..0000000000
--- a/compat/thrust/system/cuda/detail/adjacent_difference.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.h
- *  \brief CUDA implementation of adjacent_difference.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/adjacent_difference.inl>
-
diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.inl b/compat/thrust/system/cuda/detail/adjacent_difference.inl
deleted file mode 100644
index 9e4756a5a3..0000000000
--- a/compat/thrust/system/cuda/detail/adjacent_difference.inl
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/gather.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/detail/internal/decompose.h>
-#include <thrust/system/cuda/detail/default_decomposition.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/launch_calculator.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template <typename Decomposition>
-struct last_index_in_each_interval : public thrust::unary_function<typename Decomposition::index_type, typename Decomposition::index_type>
-{
-  typedef typename Decomposition::index_type index_type;
-
-  Decomposition decomp;
-
-  last_index_in_each_interval(Decomposition decomp) : decomp(decomp) {}
-
-  __host__ __device__
-  index_type operator()(index_type interval)
-  {
-    return decomp[interval].end() - 1;
-  }
-};
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition,
-          typename Context>
-struct adjacent_difference_closure
-{
-  InputIterator1 input;
-  InputIterator2 input_copy;
-  OutputIterator output;
-  BinaryFunction binary_op;
-  Decomposition  decomp;
-  Context        context;
-
-  typedef Context context_type;
-  
-  adjacent_difference_closure(InputIterator1 input,
-                              InputIterator2 input_copy,
-                              OutputIterator output,
-                              BinaryFunction binary_op,
-                              Decomposition  decomp,
-                              Context        context = Context())
-    : input(input), input_copy(input_copy), output(output), binary_op(binary_op), decomp(decomp), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename thrust::iterator_value<InputIterator1>::type  InputType;
-    typedef typename Decomposition::index_type index_type;
-
-    // this block processes results in [range.begin(), range.end())
-    thrust::system::detail::internal::index_range<index_type> range = decomp[context.block_index()];
-    
-    input_copy += context.block_index() - 1;
-      
-    // prime the temp values for all threads so we don't need to launch a default constructor
-    InputType next_left = (context.block_index() == 0) ? *input : *input_copy;
-
-    index_type base = range.begin();
-    index_type i    = range.begin() + context.thread_index();
-    
-    if (i < range.end())
-    {
-      if (context.thread_index() > 0)
-      {
-        InputIterator1 temp = input + (i - 1);
-        next_left = *temp;
-      }              
-    }
-    
-    input  += i;
-    output += i;
-
-    while (base < range.end())
-    {
-      InputType curr_left = next_left;
-
-      if (i + context.block_dimension() < range.end())
-      {
-        InputIterator1 temp = input + (context.block_dimension() - 1);
-        next_left = *temp;
-      }
-
-      context.barrier();
-
-      if (i < range.end())
-      {
-        if (i == 0)
-          *output = *input;
-        else
-        {
-          InputType x = *input;
-          *output = binary_op(x, curr_left);
-        }
-      }
-
-      i      += context.block_dimension();
-      base   += context.block_dimension();
-      input  += context.block_dimension();
-      output += context.block_dimension();
-    }
-  }
-};
-
-} // end namespace detail
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type                        InputType;
-  typedef typename thrust::iterator_difference<InputIterator>::type                   IndexType;
-  typedef          thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
-
-  IndexType n = last - first;
-
-  if (n == 0)
-    return result;
-
-  Decomposition decomp = default_decomposition(last - first);
-
-  // allocate temporary storage
-  thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, decomp.size() - 1);
-
-  // gather last value in each interval
-  detail::last_index_in_each_interval<Decomposition> unary_op(decomp);
-  thrust::gather(exec,
-                 thrust::make_transform_iterator(thrust::counting_iterator<IndexType>(0), unary_op),
-                 thrust::make_transform_iterator(thrust::counting_iterator<IndexType>(0), unary_op) + (decomp.size() - 1),
-                 first,
-                 temp.begin());
-
-  
-  typedef typename thrust::detail::temporary_array<InputType,DerivedPolicy>::iterator InputIterator2;
-  typedef detail::blocked_thread_array Context;
-  typedef detail::adjacent_difference_closure<InputIterator,InputIterator2,OutputIterator,BinaryFunction,Decomposition,Context> Closure;
-
-  Closure closure(first, temp.begin(), result, binary_op, decomp); 
-
-  detail::launch_closure(closure, decomp.size());
-  
-  return result + n;
-}
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/assign_value.h b/compat/thrust/system/cuda/detail/assign_value.h
deleted file mode 100644
index c90cf65b79..0000000000
--- a/compat/thrust/system/cuda/detail/assign_value.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/copy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined
-//     symbols resulting from assign_value
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-
-namespace
-{
-
-template<typename DerivedPolicy, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value_msvc2005_war(thrust::cuda::execution_policy<DerivedPolicy> &exec, Pointer1 dst, Pointer2 src)
-{
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static void host_path(thrust::cuda::execution_policy<DerivedPolicy> &exec, Pointer1 dst, Pointer2 src)
-    {
-      thrust::copy(exec, src, src + 1, dst);
-    }
-
-    __device__ inline static void device_path(thrust::cuda::execution_policy<DerivedPolicy> &, Pointer1 dst, Pointer2 src)
-    {
-      *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src);
-    }
-  };
-
-#ifndef __CUDA_ARCH__
-  war_nvbugs_881631::host_path(exec,dst,src);
-#else
-  war_nvbugs_881631::device_path(exec,dst,src);
-#endif // __CUDA_ARCH__
-} // end assign_value_msvc2005_war()
-
-} // end anon namespace
-
-template<typename DerivedPolicy, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value(thrust::cuda::execution_policy<DerivedPolicy> &exec, Pointer1 dst, Pointer2 src)
-{
-  return assign_value_msvc2005_war(exec,dst,src);
-} // end assign_value()
-
-#else
-
-template<typename DerivedPolicy, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value(thrust::cuda::execution_policy<DerivedPolicy> &exec, Pointer1 dst, Pointer2 src)
-{
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static void host_path(thrust::cuda::execution_policy<DerivedPolicy> &exec, Pointer1 dst, Pointer2 src)
-    {
-      thrust::copy(exec, src, src + 1, dst);
-    }
-
-    __device__ inline static void device_path(thrust::cuda::execution_policy<DerivedPolicy> &, Pointer1 dst, Pointer2 src)
-    {
-      *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src);
-    }
-  };
-
-#ifndef __CUDA_ARCH__
-  war_nvbugs_881631::host_path(exec,dst,src);
-#else
-  war_nvbugs_881631::device_path(exec,dst,src);
-#endif // __CUDA_ARCH__
-} // end assign_value()
-
-#endif // msvc 2005 WAR
-
-
-// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined
-//     symbols resulting from assign_value
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-
-namespace
-{
-
-
-template<typename System1, typename System2, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value_msvc2005_war(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-{
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static void host_path(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-    {
-      // rotate the systems so that they are ordered the same as (src, dst)
-      // for the call to thrust::copy
-      cross_system<System2,System1> rotated_systems = systems.rotate();
-      thrust::copy(rotated_systems, src, src + 1, dst);
-    }
-
-    __device__ inline static void device_path(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-    {
-      // XXX forward the true cuda::execution_policy inside systems here
-      //     instead of materializing a tag
-      thrust::cuda::tag cuda_tag;
-      thrust::system::cuda::detail::assign_value(cuda_tag, dst, src);
-    }
-  };
-
-#if __CUDA_ARCH__
-  war_nvbugs_881631::device_path(systems,dst,src);
-#else
-  war_nvbugs_881631::host_path(systems,dst,src);
-#endif
-} // end assign_value_msvc2005_war
-
-
-} // end anon namespace
-
-
-template<typename System1, typename System2, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-{
-  return assign_value_msvc2005_war(systems,dst,src);
-} // end assign_value()
-
-
-#else
-
-
-template<typename System1, typename System2, typename Pointer1, typename Pointer2>
-inline __host__ __device__
-  void assign_value(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-{
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static void host_path(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-    {
-      // rotate the systems so that they are ordered the same as (src, dst)
-      // for the call to thrust::copy
-      cross_system<System2,System1> rotated_systems = systems.rotate();
-      thrust::copy(rotated_systems, src, src + 1, dst);
-    }
-
-    __device__ inline static void device_path(cross_system<System1,System2> &systems, Pointer1 dst, Pointer2 src)
-    {
-      // XXX forward the true cuda::execution_policy inside systems here
-      //     instead of materializing a tag
-      thrust::cuda::tag cuda_tag;
-      thrust::system::cuda::detail::assign_value(cuda_tag, dst, src);
-    }
-  };
-
-#if __CUDA_ARCH__
-  war_nvbugs_881631::device_path(systems,dst,src);
-#else
-  war_nvbugs_881631::host_path(systems,dst,src);
-#endif
-} // end assign_value()
-
-
-#endif // msvc 2005 WAR
-
-  
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/binary_search.h b/compat/thrust/system/cuda/detail/binary_search.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/binary_search.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/block/copy.h b/compat/thrust/system/cuda/detail/block/copy.h
deleted file mode 100644
index 9cc786bfcf..0000000000
--- a/compat/thrust/system/cuda/detail/block/copy.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file copy.h
- *  \brief CUDA implementation of device-to-device copy,
- *         based on Gregory Diamos' memcpy code.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#include <thrust/pair.h>
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/dispatch/is_trivial_copy.h>
-#include <thrust/detail/raw_reference_cast.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-namespace trivial_copy_detail
-{
-
-
-template<typename Size>
-  inline __device__ thrust::pair<Size,Size> quotient_and_remainder(Size n, Size d)
-{
-  Size quotient  = n / d;
-  Size remainder = n - d * quotient; 
-  return thrust::make_pair(quotient,remainder);
-} // end quotient_and_remainder()
-
-
-// assumes the addresses dst & src are aligned to T boundaries
-template<typename Context,
-         typename T>
-__device__ __thrust_forceinline__
-void aligned_copy(Context context, T *dst, const T *src, unsigned int num_elements)
-{
-  for(unsigned int i = context.thread_index();
-      i < num_elements;
-      i += context.block_dimension())
-  {
-    dst[i] = src[i];
-  }
-} // end aligned_copy()
-
-
-} // end namespace trivial_copy_detail
-
-
-template <typename Context>
-__device__ __thrust_forceinline__
-void trivial_copy(Context context, void* destination_, const void* source_, size_t num_bytes)
-{
-  // reinterpret at bytes
-  char* destination  = reinterpret_cast<char*>(destination_);
-  const char* source = reinterpret_cast<const char*>(source_);
- 
-  // TODO replace this with uint64
-#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC
-  typedef long long  int2;
-  typedef long long uint2;
-#endif // THRUST_DEVICE_COMPILER_NVCC
-
-  // check alignment
-  // XXX can we do this in three steps?
-  //     1. copy until alignment is met
-  //     2. go hog wild
-  //     3. get the remainder
-  if(reinterpret_cast<size_t>(destination) % sizeof(uint2) != 0 || reinterpret_cast<size_t>(source) % sizeof(uint2) != 0)
-  {
-    for(unsigned int i = context.thread_index(); i < num_bytes; i += context.block_dimension())
-    {
-      destination[i] = source[i];
-    }
-  }
-  else
-  {
-    // it's aligned; do a wide copy
-
-    // this pair stores the number of int2s in the aligned portion of the arrays
-    // and the number of bytes in the remainder
-    const thrust::pair<size_t,size_t> num_wide_elements_and_remainder_bytes = trivial_copy_detail::quotient_and_remainder(num_bytes, sizeof(int2));
-
-    // copy int2 elements
-    trivial_copy_detail::aligned_copy(context,
-                                      reinterpret_cast<int2*>(destination),
-                                      reinterpret_cast<const int2*>(source),
-                                      num_wide_elements_and_remainder_bytes.first);
-
-    // XXX we could copy int elements here
-
-    // copy remainder byte by byte
-
-    // to find the beginning of the remainder arrays, we need to point at the beginning, and then skip the number of bytes in the aligned portion
-    // this is sizeof(int2) times the number of int2s comprising the aligned portion
-    const char *remainder_first  = reinterpret_cast<const char*>(source + sizeof(int2) * num_wide_elements_and_remainder_bytes.first);
-          char *remainder_result = reinterpret_cast<char*>(destination  + sizeof(int2) * num_wide_elements_and_remainder_bytes.first);
-
-    trivial_copy_detail::aligned_copy(context, remainder_result, remainder_first, num_wide_elements_and_remainder_bytes.second);
-  }
-} // end trivial_copy()
-
-
-namespace detail
-{
-namespace dispatch
-{
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  __thrust_forceinline__ __device__
-  RandomAccessIterator2 copy(Context context,
-                             RandomAccessIterator1 first,
-                             RandomAccessIterator1 last,
-                             RandomAccessIterator2 result,
-                             thrust::detail::true_type is_trivial_copy)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type T;
-
-  const T *src = &thrust::raw_reference_cast(*first);
-        T *dst = &thrust::raw_reference_cast(*result);
-
-  size_t n = (last - first);
-  thrust::system::cuda::detail::block::trivial_copy(context, dst, src, n * sizeof(T));
-  return result + n;
-} // end copy()
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  __thrust_forceinline__ __device__
-  RandomAccessIterator2 copy(Context context, 
-                             RandomAccessIterator1 first,
-                             RandomAccessIterator1 last,
-                             RandomAccessIterator2 result,
-                             thrust::detail::false_type is_trivial_copy)
-{
-  RandomAccessIterator2 end_of_output = result + (last - first);
-  
-  // advance iterators
-  first  += context.thread_index();
-  result += context.thread_index();
-
-  for(;
-      first < last;
-      first  += context.block_dimension(),
-      result += context.block_dimension())
-  {
-    *result = *first;
-  } // end for
-
-  return end_of_output;
-} // end copy()
-
-} // end namespace dispatch
-} // end namespace detail
-
-template<typename Context, 
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  __thrust_forceinline__ __device__
-  RandomAccessIterator2 copy(Context context,
-                             RandomAccessIterator1 first,
-                             RandomAccessIterator1 last,
-                             RandomAccessIterator2 result)
-{
-  return detail::dispatch::copy(context, first, last, result,
-#if __CUDA_ARCH__ < 200
-      // does not work reliably on pre-Fermi due to "Warning: ... assuming global memory space" issues
-      thrust::detail::false_type()
-#else
-      typename thrust::detail::dispatch::is_trivial_copy<RandomAccessIterator1,RandomAccessIterator2>::type()
-#endif
-      );
-} // end copy()
-
-
-template<typename Context, typename RandomAccessIterator1, typename Size, typename RandomAccessIterator2>
-inline __device__
-RandomAccessIterator2 copy_n(Context &ctx, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result)
-{
-  for(Size i = ctx.thread_index(); i < n; i += ctx.block_dimension())
-  {
-    result[i] = first[i];
-  }
-
-  ctx.barrier();
-
-  return result + n;
-}
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/exclusive_scan.h b/compat/thrust/system/cuda/detail/block/exclusive_scan.h
deleted file mode 100644
index 580a7578bf..0000000000
--- a/compat/thrust/system/cuda/detail/block/exclusive_scan.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/functional.h>
-#include <thrust/system/cuda/detail/block/inclusive_scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-
-template<typename Context, typename RandomAccessIterator, typename T, typename BinaryFunction>
-inline __device__
-typename thrust::iterator_value<RandomAccessIterator>::type
-  inplace_exclusive_scan(Context &ctx, RandomAccessIterator first, T init, BinaryFunction op)
-{
-  // perform an inclusive scan, then shift right
-  block::inplace_inclusive_scan(ctx, first, op);
-
-  typename thrust::iterator_value<RandomAccessIterator>::type carry = first[ctx.block_dimension() - 1];
-
-  ctx.barrier();
-
-  typename thrust::iterator_value<RandomAccessIterator>::type left = (ctx.thread_index() == 0) ? init : first[ctx.thread_index() - 1];
-
-  ctx.barrier();
-
-  first[ctx.thread_index()] = left;
-
-  ctx.barrier();
-
-  return carry;
-}
-
-
-template<typename Context, typename Iterator, typename T>
-inline __device__
-  typename thrust::iterator_value<Iterator>::type
-    inplace_exclusive_scan(Context &ctx, Iterator first, T init)
-{
-  return block::inplace_exclusive_scan(ctx, first, init, thrust::plus<typename thrust::iterator_value<Iterator>::type>());
-}
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/inclusive_scan.h b/compat/thrust/system/cuda/detail/block/inclusive_scan.h
deleted file mode 100644
index 012f7cd2f7..0000000000
--- a/compat/thrust/system/cuda/detail/block/inclusive_scan.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-template<typename Context,
-         typename InputIterator,
-         typename BinaryFunction>
-__device__ __thrust_forceinline__
-void inclusive_scan(Context context,
-                    InputIterator first,
-                    BinaryFunction binary_op)
-{
-  // TODO generalize to arbitrary n
-  // TODO support dynamic block_size
-  const unsigned int block_size = Context::ThreadsPerBlock::value;
-
-  typename thrust::iterator_value<InputIterator>::type val = first[context.thread_index()];
-
-  if(block_size >    1) { if (context.thread_index() >=    1) { val = binary_op(first[context.thread_index() -    1], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >    2) { if (context.thread_index() >=    2) { val = binary_op(first[context.thread_index() -    2], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } 
-  if(block_size >    4) { if (context.thread_index() >=    4) { val = binary_op(first[context.thread_index() -    4], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >    8) { if (context.thread_index() >=    8) { val = binary_op(first[context.thread_index() -    8], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   16) { if (context.thread_index() >=   16) { val = binary_op(first[context.thread_index() -   16], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   32) { if (context.thread_index() >=   32) { val = binary_op(first[context.thread_index() -   32], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   64) { if (context.thread_index() >=   64) { val = binary_op(first[context.thread_index() -   64], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  128) { if (context.thread_index() >=  128) { val = binary_op(first[context.thread_index() -  128], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  256) { if (context.thread_index() >=  256) { val = binary_op(first[context.thread_index() -  256], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  512) { if (context.thread_index() >=  512) { val = binary_op(first[context.thread_index() -  512], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-  if(block_size > 1024) { if (context.thread_index() >= 1024) { val = binary_op(first[context.thread_index() - 1024], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); }
-} // end inclusive_scan()
-
-
-template<typename Context,
-         typename InputIterator,
-         typename Size,
-         typename BinaryFunction>
-__device__ __thrust_forceinline__
-void inclusive_scan_n(Context context,
-                      InputIterator first,
-                      Size n,
-                      BinaryFunction binary_op)
-{
-  // TODO support n > context.block_dimension()
-  typename thrust::iterator_value<InputIterator>::type val = first[context.thread_index()];
-
-  for (unsigned int i = 1; i < n; i <<= 1)
-  {
-    if (context.thread_index() < n && context.thread_index() >= i)
-      val = binary_op(first[context.thread_index() - i], val);
-
-    context.barrier();
-    
-    first[context.thread_index()] = val;
-    
-    context.barrier();
-  }
-} // end inclusive_scan()
-
-
-template<typename Context,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename BinaryFunction>
-__device__ __thrust_forceinline__
-void inclusive_scan_by_flag(Context context,
-                            InputIterator1 first1,
-                            InputIterator2 first2,
-                            BinaryFunction binary_op)
-{
-  // TODO generalize to arbitrary n
-  // TODO support dynamic block_size
-  const unsigned int block_size = Context::ThreadsPerBlock::value;
-
-  typename thrust::iterator_value<InputIterator1>::type flg = first1[context.thread_index()];
-  typename thrust::iterator_value<InputIterator2>::type val = first2[context.thread_index()];
-
-  if(block_size >    1) { if (context.thread_index() >=    1) { if (!flg) { flg |= first1[context.thread_index() -    1]; val = binary_op(first2[context.thread_index() -    1], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >    2) { if (context.thread_index() >=    2) { if (!flg) { flg |= first1[context.thread_index() -    2]; val = binary_op(first2[context.thread_index() -    2], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } 
-  if(block_size >    4) { if (context.thread_index() >=    4) { if (!flg) { flg |= first1[context.thread_index() -    4]; val = binary_op(first2[context.thread_index() -    4], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >    8) { if (context.thread_index() >=    8) { if (!flg) { flg |= first1[context.thread_index() -    8]; val = binary_op(first2[context.thread_index() -    8], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   16) { if (context.thread_index() >=   16) { if (!flg) { flg |= first1[context.thread_index() -   16]; val = binary_op(first2[context.thread_index() -   16], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   32) { if (context.thread_index() >=   32) { if (!flg) { flg |= first1[context.thread_index() -   32]; val = binary_op(first2[context.thread_index() -   32], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >   64) { if (context.thread_index() >=   64) { if (!flg) { flg |= first1[context.thread_index() -   64]; val = binary_op(first2[context.thread_index() -   64], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  128) { if (context.thread_index() >=  128) { if (!flg) { flg |= first1[context.thread_index() -  128]; val = binary_op(first2[context.thread_index() -  128], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  256) { if (context.thread_index() >=  256) { if (!flg) { flg |= first1[context.thread_index() -  256]; val = binary_op(first2[context.thread_index() -  256], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size >  512) { if (context.thread_index() >=  512) { if (!flg) { flg |= first1[context.thread_index() -  512]; val = binary_op(first2[context.thread_index() -  512], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-  if(block_size > 1024) { if (context.thread_index() >= 1024) { if (!flg) { flg |= first1[context.thread_index() - 1024]; val = binary_op(first2[context.thread_index() - 1024], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); }
-} // end inclusive_scan_by_flag()
-
-
-template<typename Context,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename Size,
-         typename BinaryFunction>
-__device__ __thrust_forceinline__
-void inclusive_scan_by_flag_n(Context context,
-                              InputIterator1 first1,
-                              InputIterator2 first2,
-                              Size n,
-                              BinaryFunction binary_op)
-{
-  // TODO support n > context.block_dimension()
-  typename thrust::iterator_value<InputIterator1>::type flg = first1[context.thread_index()];
-  typename thrust::iterator_value<InputIterator2>::type val = first2[context.thread_index()];
-  
-  for (unsigned int i = 1; i < n; i <<= 1)
-  {
-    if (context.thread_index() < n && context.thread_index() >= i) 
-    {
-      if (!flg)
-      { 
-        flg |= first1[context.thread_index() - i];
-        val  = binary_op(first2[context.thread_index() - i], val);
-      }
-    }
-
-    context.barrier();
-    
-    first1[context.thread_index()] = flg;
-    first2[context.thread_index()] = val;
-    
-    context.barrier();
-  }
-} // end inclusive_scan_by_flag()
-
-
-template<typename Context, typename RandomAccessIterator, typename BinaryFunction>
-__device__ __thrust_forceinline__
-void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first, BinaryFunction op)
-{
-  typename thrust::iterator_value<RandomAccessIterator>::type x = first[ctx.thread_index()];
-
-  for(unsigned int offset = 1; offset < ctx.block_dimension(); offset *= 2)
-  {
-    if(ctx.thread_index() >= offset)
-    {
-      x = op(first[ctx.thread_index() - offset], x);
-    }
-
-    ctx.barrier();
-
-    first[ctx.thread_index()] = x;
-
-    ctx.barrier();
-  }
-}
-
-
-template<typename Context, typename RandomAccessIterator>
-__device__ __thrust_forceinline__
-void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first)
-{
-  block::inplace_inclusive_scan(ctx, first, thrust::plus<typename thrust::iterator_value<RandomAccessIterator>::type>());
-}
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/merge.h b/compat/thrust/system/cuda/detail/block/merge.h
deleted file mode 100644
index 9af0b7bfbb..0000000000
--- a/compat/thrust/system/cuda/detail/block/merge.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename StrictWeakOrdering>
-__device__ __thrust_forceinline__
-  RandomAccessIterator3 merge(Context context,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              RandomAccessIterator2 last2,
-                              RandomAccessIterator3 result,
-                              StrictWeakOrdering comp);
-
-// XXX assumes that context.block_dimension() <= n1 and
-//                  context.block_dimension() <= n2
-// This algorithm is analogous to inplace_merge
-// but instead of working on the ranges
-// [first, middle) and [middle, last)
-// it works on the ranges
-// [first, first + n1) and [first + n1, first + n1 + n2)
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Size1,
-         typename Size2,
-         typename StrictWeakOrdering>
-__device__ __thrust_forceinline__
-  void inplace_merge_by_key_n(Context context,
-                              RandomAccessIterator1 keys_first,
-                              RandomAccessIterator2 values_first,
-                              Size1 n1,
-                              Size2 n2,
-                              StrictWeakOrdering comp);
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/block/merge.inl>
-
diff --git a/compat/thrust/system/cuda/detail/block/merge.inl b/compat/thrust/system/cuda/detail/block/merge.inl
deleted file mode 100644
index 5eae2b58f3..0000000000
--- a/compat/thrust/system/cuda/detail/block/merge.inl
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/raw_reference_cast.h>
-#include <thrust/system/detail/generic/scalar/binary_search.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename StrictWeakOrdering>
-__device__ __thrust_forceinline__
-  RandomAccessIterator3 merge(Context context,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              RandomAccessIterator2 last2,
-                              RandomAccessIterator3 result,
-                              StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_difference<RandomAccessIterator1>::type difference1;
-  typedef typename thrust::iterator_difference<RandomAccessIterator2>::type difference2;
-
-  difference1 n1 = last1 - first1;
-  difference2 n2 = last2 - first2;
-
-  // find the rank of each element in the other array
-  difference2 rank2 = 0;
-  if(context.thread_index() < n1)
-  {
-    RandomAccessIterator1 x = first1;
-    x += context.thread_index();
-
-    // lower_bound ensures that x sorts before any equivalent element of input2
-    // this ensures stability
-    rank2 = thrust::system::detail::generic::scalar::lower_bound(first2, last2, raw_reference_cast(*x), comp) - first2;
-  } // end if
-
-  difference1 rank1 = 0;
-  if(context.thread_index() < n2)
-  {
-    RandomAccessIterator2 x = first2 + context.thread_index();
-
-    // upper_bound ensures that x sorts before any equivalent element of input1
-    // this ensures stability
-    rank1 = thrust::system::detail::generic::scalar::upper_bound(first1, last1, raw_reference_cast(*x), comp) - first1;
-  } // end if
-
-  if(context.thread_index() < n1)
-  {
-    // scatter each element from input1
-    RandomAccessIterator1 src = first1 + context.thread_index();
-    RandomAccessIterator3 dst = result + context.thread_index() + rank2;
-
-    *dst = *src;
-  }
-
-  if(context.thread_index() < n2)
-  {
-    // scatter each element from input2
-    RandomAccessIterator2 src = first2 + context.thread_index();
-    RandomAccessIterator3 dst = result + context.thread_index() + rank1;
-
-    *dst = *src;
-  }
-
-  return result + n1 + n2;
-} // end merge
-
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Size1,
-         typename Size2,
-         typename StrictWeakOrdering>
-__device__ __thrust_forceinline__
-  void inplace_merge_by_key_n(Context context,
-                              RandomAccessIterator1 keys_first,
-                              RandomAccessIterator2 values_first,
-                              Size1 n1,
-                              Size2 n2,
-                              StrictWeakOrdering comp)
-{
-  RandomAccessIterator1 input1 = keys_first;
-  RandomAccessIterator1 input2 = keys_first + n1;
-
-  RandomAccessIterator2 input1val = values_first;
-  RandomAccessIterator2 input2val = values_first + n1;
-  
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type ValueType;
-
-  // XXX use uninitialized here
-  KeyType inp1 = input1[context.thread_index()]; ValueType inp1val = input1val[context.thread_index()];
-  KeyType inp2 = input2[context.thread_index()]; ValueType inp2val = input2val[context.thread_index()];
-  
-  // to merge input1 and input2, use binary search to find the rank of inp1 & inp2 in arrays input2 & input1, respectively
-  // as before, the "end" variables point to one element after the last element of the arrays
-  
-  // start by looking through input2 for inp1's rank
-  unsigned int start_1 = 0;
-  
-  // don't do the search if our value is beyond the end of input1
-  if(context.thread_index() < n1)
-  {
-    start_1 = thrust::system::detail::generic::scalar::lower_bound_n(input2, n2, inp1, comp) - input2;
-  } // end if
-  
-  // now look through input1 for inp2's rank
-  unsigned int start_2 = 0;
-  
-  // don't do the search if our value is beyond the end of input2
-  if(context.thread_index() < n2)
-  {
-    // upper_bound ensures that equivalent elements in the first range sort before the second
-    start_2 = thrust::system::detail::generic::scalar::upper_bound_n(input1, n1, inp2, comp) - input1;
-  } // end if
-
-  context.barrier();
-  
-  // Write back into the right position to the input arrays; can be done in place since we read in
-  // the input arrays into registers before.
-  if(context.thread_index() < n1)
-  {
-    input1[start_1 + context.thread_index()] = inp1;
-    input1val[start_1 + context.thread_index()] = inp1val;
-  } // end if
-  
-  if(context.thread_index() < n2)
-  {
-    input1[start_2 + context.thread_index()] = inp2;
-    input1val[start_2 + context.thread_index()] = inp2val;
-  } // end if
-} // end inplace_merge_by_key_n()
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/merging_sort.h b/compat/thrust/system/cuda/detail/block/merging_sort.h
deleted file mode 100644
index 8f8f999ec5..0000000000
--- a/compat/thrust/system/cuda/detail/block/merging_sort.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file merging_sort.h
- *  \brief Block version of merge sort
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/detail/generic/scalar/binary_search.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-
-template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Compare>
-__device__ void conditional_swap(RandomAccessIterator1 keys_first,
-                                 RandomAccessIterator2 values_first,
-                                 const unsigned int i,
-                                 const unsigned int end,
-                                 bool pred,
-                                 Compare comp)
-{
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  typedef typename thrust::iterator_traits<RandomAccessIterator2>::value_type ValueType;
-
-  if(pred && i+1<end)
-  {
-    KeyType xi = keys_first[i];
-    KeyType xj = keys_first[i+1];
-
-    // swap if xj sorts before xi
-    if(comp(xj, xi))
-    {
-      // XXX this implementation should really dispatch swap via ADL
-      ValueType yi;
-      yi = values_first[i];
-      ValueType yj;
-      yj = values_first[i+1];
-
-      keys_first[i]     = xj;
-      keys_first[i+1]   = xi;
-      values_first[i]   = yj;
-      values_first[i+1] = yi;
-    }
-  }
-}
-
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Compare>
-__device__ void transposition_sort(Context context,
-                                   RandomAccessIterator1 keys_first,
-                                   RandomAccessIterator2 values_first,
-                                   const unsigned int i,
-                                   const unsigned int end,
-                                   const unsigned int size,
-                                   Compare comp)
-{
-  const bool is_odd = i&0x1;
-  
-  for(unsigned int round=size/2; round>0; --round)
-  {
-    // ODDS
-    conditional_swap(keys_first, values_first, i, end, is_odd, comp);
-    context.barrier();
-  
-    // EVENS
-    conditional_swap(keys_first, values_first, i, end, !is_odd, comp);
-    context.barrier();
-  }
-}
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-__device__ void merge(Context context,
-                      RandomAccessIterator1 keys_first, 
-                      RandomAccessIterator2 values_first,
-                      const unsigned int i,
-                      const unsigned int n,
-                      unsigned int begin,
-                      unsigned int end,
-                      unsigned int h,
-                      StrictWeakOrdering cmp)
-{
-  // INVARIANT: Every element i resides within a sequence [begin,end)
-  //            of length h which is already sorted
-  while( h<n )
-  {
-    h *= 2;
-
-    unsigned int new_begin = i&(~(h-1));
-    unsigned int new_end   = min(n,new_begin+h);
-
-    typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-    typedef typename thrust::iterator_traits<RandomAccessIterator2>::value_type ValueType;
-
-    KeyType key;
-    ValueType value;
-
-    unsigned int rank = i - begin;
-
-    // prevent out-of-bounds access
-    if(i < new_end)
-    {
-      key = keys_first[i];
-
-      if(begin==new_begin)  // in the left side of merging pair
-      {
-        RandomAccessIterator1 result = thrust::system::detail::generic::scalar::lower_bound_n(keys_first+end, new_end-end, key, cmp);
-        rank += (result - (keys_first+end));
-      }
-      else                  // in the right side of merging pair
-      {
-        RandomAccessIterator1 result = thrust::system::detail::generic::scalar::upper_bound_n(keys_first+new_begin, begin-new_begin, key, cmp);
-        rank += (result - (keys_first+new_begin));
-      }
-
-      value = values_first[i];
-    }
-
-    context.barrier();
-
-    if(i < new_end)
-    {
-      keys_first[new_begin+rank] = key;
-      values_first[new_begin+rank] = value;
-    }
-    
-    context.barrier();
-
-    begin = new_begin;
-    end   = new_end;
-  }
-}
-
-
-/*! Block-wise implementation of merge sort.
- *  It provides the same external interface as odd_even_sort.
- */
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-__device__ void merging_sort(Context context,
-                             RandomAccessIterator1 keys_first,
-                             RandomAccessIterator2 values_first,
-                             const unsigned int n,
-                             StrictWeakOrdering comp)
-{
-  // Phase 1: Sort subsequences of length 32 using odd-even
-  //          transposition sort.  The code below assumes that h is a
-  //          power of 2.  Empirically, 32 delivers best results,
-  //          which is not surprising since that's the warp width.
-  unsigned int i = context.thread_index();
-  unsigned int h = 32;
-  unsigned int begin=i&(~(h-1)),  end=min(n,begin+h);
-  
-  transposition_sort(context, keys_first, values_first, i, end, h, comp);
-  
-  // Phase 2: Apply merge tree to produce final sorted results
-  merge(context, keys_first, values_first, i, n, begin, end, h, comp);
-} // end merging_sort()
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/odd_even_sort.h b/compat/thrust/system/cuda/detail/block/odd_even_sort.h
deleted file mode 100644
index 0fa0ea069a..0000000000
--- a/compat/thrust/system/cuda/detail/block/odd_even_sort.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file odd_even_sort.h
- *  \brief Block versions of Batcher's Odd-Even Merge Sort
- */
-
-#pragma once
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-
-/*! Block-wise implementation of Batcher's Odd-Even Merge Sort
- *  This implementation is based on Nadathur Satish's.
- */
-template<typename KeyType,
-         typename ValueType,
-         typename StrictWeakOrdering>
-  __device__ void odd_even_sort(KeyType *keys,
-                                ValueType *data,
-                                const unsigned int n,
-                                StrictWeakOrdering comp)
-{
-  for(unsigned int p = blockDim.x>>1; p > 0; p >>= 1)
-  {
-    unsigned int q = blockDim.x>>1, r = 0, d = p;
-
-    while(q >= p)
-    {
-      unsigned int j = threadIdx.x + d;
-
-      // if j lies beyond the end of the array, we consider it "sorted" wrt i
-      // regardless of whether i lies beyond the end of the array 
-      if(threadIdx.x < (blockDim.x-d) && (threadIdx.x & p) == r && j < n)
-      {
-        KeyType xikey = keys[threadIdx.x];
-        KeyType xjkey = keys[j];
-
-        ValueType xivalue = data[threadIdx.x];
-        ValueType xjvalue = data[j];
-
-        // does xj sort before xi?
-        if(comp(xjkey, xikey))
-        {
-          keys[threadIdx.x] = xjkey;
-          keys[j] = xikey;
-
-          data[threadIdx.x] = xjvalue;
-          data[j] = xivalue;
-        } // end if
-      } // end if
-
-      d = q - p;
-      q >>= 1;
-      r = p;
-
-      __syncthreads();
-    } // end while
-  } // end for p
-} // end odd_even_sort()
-
-template<typename KeyType,
-         typename ValueType,
-         typename StrictWeakOrdering>
-  __device__ void stable_odd_even_sort(KeyType *keys,
-                                       ValueType *data,
-                                       const unsigned int n,
-                                       StrictWeakOrdering comp)
-{
-  for(unsigned int i = 0;
-      i < blockDim.x>>1;
-      ++i)
-  {
-    bool thread_is_odd = threadIdx.x & 0x1;
-
-    // do odds first
-    if(thread_is_odd && threadIdx.x + 1 < n)
-    {
-      KeyType xikey = keys[threadIdx.x];
-      KeyType xjkey = keys[threadIdx.x + 1];
-
-      ValueType xivalue = data[threadIdx.x];
-      ValueType xjvalue = data[threadIdx.x + 1];
-
-      // does xj sort before xi?
-      if(comp(xjkey, xikey))
-      {
-        keys[threadIdx.x] = xjkey;
-        keys[threadIdx.x + 1] = xikey;
-
-        data[threadIdx.x] = xjvalue;
-        data[threadIdx.x + 1] = xivalue;
-      } // end if
-    } // end if
-
-    __syncthreads();
-
-    // do evens second
-    if(!thread_is_odd && threadIdx.x + 1 < n)
-    {
-      KeyType xikey = keys[threadIdx.x];
-      KeyType xjkey = keys[threadIdx.x + 1];
-
-      ValueType xivalue = data[threadIdx.x];
-      ValueType xjvalue = data[threadIdx.x + 1];
-
-      // does xj sort before xi?
-      if(comp(xjkey, xikey))
-      {
-        keys[threadIdx.x] = xjkey;
-        keys[threadIdx.x + 1] = xikey;
-
-        data[threadIdx.x] = xjvalue;
-        data[threadIdx.x + 1] = xivalue;
-      } // end if
-    } // end if
-
-    __syncthreads();
-  } // end for i
-} // end stable_odd_even_sort()
-
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/block/reduce.h b/compat/thrust/system/cuda/detail/block/reduce.h
deleted file mode 100644
index e0a1901b28..0000000000
--- a/compat/thrust/system/cuda/detail/block/reduce.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace block
-{
-
-/* Reduces [data, data + n) using binary_op and stores the result in data[0]
- *
- * Upon return the elements in [data + 1, data + n) have unspecified values.
- */
-template <typename Context, typename ValueIterator, typename BinaryFunction>
-__device__ __thrust_forceinline__
-void reduce_n(Context context, ValueIterator data, unsigned int n, BinaryFunction binary_op)
-{
-  if (context.block_dimension() < n)
-  {
-    for (unsigned int i = context.block_dimension() + context.thread_index(); i < n; i += context.block_dimension())
-      data[context.thread_index()] = binary_op(data[context.thread_index()], data[i]);
-
-    context.barrier();
-  }
-
-  while (n > 1)
-  {
-    unsigned int half = n / 2;
-
-    if (context.thread_index() < half)
-      data[context.thread_index()] = binary_op(data[context.thread_index()], data[n - context.thread_index() - 1]);
-
-    context.barrier();
-
-    n = n - half;
-  }
-}
-
-} // end namespace block
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/copy.h b/compat/thrust/system/cuda/detail/copy.h
deleted file mode 100644
index 8f7ee97c24..0000000000
--- a/compat/thrust/system/cuda/detail/copy.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(cross_system<System1,System2> exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(cross_system<System1,System2> exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/copy.inl>
-
diff --git a/compat/thrust/system/cuda/detail/copy.inl b/compat/thrust/system/cuda/detail/copy.inl
deleted file mode 100644
index 125eebdaa5..0000000000
--- a/compat/thrust/system/cuda/detail/copy.inl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/copy.h>
-#include <thrust/system/cuda/detail/copy_device_to_device.h>
-#include <thrust/system/cuda/detail/copy_cross_system.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename System,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<System> &system,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  return thrust::system::cuda::detail::copy_device_to_device(system,first,last,result);
-} // end copy()
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(cross_system<System1,System2> systems,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  return thrust::system::cuda::detail::copy_cross_system(systems,first,last,result);
-} // end copy()
-
-
-template<typename System,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<System> &system,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  return thrust::system::cuda::detail::copy_device_to_device(system,first,first+n,result);
-} // end copy_n()
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(cross_system<System1,System2> systems,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  return thrust::system::cuda::detail::copy_cross_system_n(systems,first,n,result);
-} // end copy_n()
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.h b/compat/thrust/system/cuda/detail/copy_cross_system.h
deleted file mode 100644
index f68ea3c88a..0000000000
--- a/compat/thrust/system/cuda/detail/copy_cross_system.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_cross_system(cross_system<System1,System2> systems,
-                                   InputIterator begin, 
-                                   InputIterator end, 
-                                   OutputIterator result);
-
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_cross_system_n(cross_system<System1,System2> systems,
-                                     InputIterator begin, 
-                                     Size n, 
-                                     OutputIterator result);
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/copy_cross_system.inl>
-
diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.inl b/compat/thrust/system/cuda/detail/copy_cross_system.inl
deleted file mode 100644
index 861cb2c2cd..0000000000
--- a/compat/thrust/system/cuda/detail/copy_cross_system.inl
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/copy_cross_system.h>
-#include <thrust/detail/copy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/dispatch/is_trivial_copy.h>
-#include <thrust/system/cuda/detail/trivial_copy.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-// XXX WAR circular #inclusion problem
-template<typename,typename> class temporary_array;
-
-} // end detail
-
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-// general input to random access case
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename RandomAccessIterator>
-  RandomAccessIterator copy_cross_system(cross_system<System1,System2> systems,
-                                         InputIterator begin,
-                                         InputIterator end,
-                                         RandomAccessIterator result,
-                                         thrust::incrementable_traversal_tag, 
-                                         thrust::random_access_traversal_tag)
-{
-  //std::cerr << std::endl;
-  //std::cerr << "general copy_host_to_device(): InputIterator: " << typeid(InputIterator).name() << std::endl;
-  //std::cerr << "general copy_host_to_device(): OutputIterator: " << typeid(OutputIterator).name() << std::endl;
-
-  typedef typename thrust::iterator_value<InputIterator>::type InputType;
-
-  // allocate temporary storage in System1
-  thrust::detail::temporary_array<InputType, System1> temp(systems.system1,begin,end);
-  return thrust::copy(systems, temp.begin(), temp.end(), result);
-}
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename RandomAccessIterator>
-  RandomAccessIterator copy_cross_system_n(cross_system<System1,System2> systems,
-                                           InputIterator first,
-                                           Size n,
-                                           RandomAccessIterator result,
-                                           thrust::incrementable_traversal_tag, 
-                                           thrust::random_access_traversal_tag)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type InputType;
-
-  // allocate and copy to temporary storage System1
-  thrust::detail::temporary_array<InputType, System1> temp(systems.system1, first, n);
-
-  // recurse
-  return copy_cross_system(systems, temp.begin(), temp.end(), result);
-}
-
-
-// random access to general output case
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator copy_cross_system(cross_system<System1,System2> systems,
-                                   RandomAccessIterator begin,
-                                   RandomAccessIterator end,
-                                   OutputIterator result,
-                                   thrust::random_access_traversal_tag, 
-                                   thrust::incrementable_traversal_tag)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type InputType;
-
-  // copy to temporary storage in System2
-  thrust::detail::temporary_array<InputType,System2> temp(systems.system2, systems.system1, begin, end);
-
-  return thrust::copy(systems.system2, temp.begin(), temp.end(), result);
-}
-
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_cross_system_n(cross_system<System1,System2> systems,
-                                     RandomAccessIterator first,
-                                     Size n,
-                                     OutputIterator result,
-                                     thrust::random_access_traversal_tag, 
-                                     thrust::incrementable_traversal_tag)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type InputType;
-
-  // copy to temporary storage in System2
-  thrust::detail::temporary_array<InputType,System2> temp(systems.system2, systems.system1, first, n);
-
-  // copy temp to result
-  return thrust::copy(systems.system2, temp.begin(), temp.end(), result);
-}
-
-
-// trivial copy
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 copy_cross_system(cross_system<System1,System2> systems,
-                                          RandomAccessIterator1 begin,
-                                          RandomAccessIterator1 end,
-                                          RandomAccessIterator2 result,
-                                          thrust::random_access_traversal_tag,
-                                          thrust::random_access_traversal_tag,
-                                          thrust::detail::true_type) // trivial copy
-{
-//  std::cerr << std::endl;
-//  std::cerr << "random access copy_device_to_host(): trivial" << std::endl;
-//  std::cerr << "general copy_device_to_host(): RandomAccessIterator1: " << typeid(RandomAccessIterator1).name() << std::endl;
-//  std::cerr << "general copy_device_to_host(): RandomAccessIterator2: " << typeid(RandomAccessIterator2).name() << std::endl;
-  
-  // how many elements to copy?
-  typename thrust::iterator_traits<RandomAccessIterator1>::difference_type n = end - begin;
-
-  thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, result);
-
-  return result + n;
-}
-
-
-namespace detail
-{
-
-// random access non-trivial iterator to random access iterator
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system<System1,System2> systems,
-                                                                    RandomAccessIterator1 begin,
-                                                                    RandomAccessIterator1 end,
-                                                                    RandomAccessIterator2 result,
-                                                                    thrust::detail::false_type) // InputIterator is non-trivial
-{
-  // copy the input to a temporary input system buffer of OutputType
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type OutputType;
-
-  // allocate temporary storage in System1
-  thrust::detail::temporary_array<OutputType,System1> temp(systems.system1, begin, end);
-
-  // recurse
-  return copy_cross_system(systems, temp.begin(), temp.end(), result);
-}
-
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system<System1,System2> systems,
-                                                                    RandomAccessIterator1 begin,
-                                                                    RandomAccessIterator1 end,
-                                                                    RandomAccessIterator2 result,
-                                                                    thrust::detail::true_type) // InputIterator is trivial
-{
-  typename thrust::iterator_difference<RandomAccessIterator1>::type n = thrust::distance(begin, end);
-
-  // allocate temporary storage in System2
-  // retain the input's type for the intermediate storage
-  // do not initialize the storage (the 0 does this)
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type InputType;
-  thrust::detail::temporary_array<InputType,System2> temp(0, systems.system2, n);
-
-  // force a trivial (memcpy) copy of the input to the temporary
-  // note that this will not correctly account for copy constructors
-  // but there's nothing we can do about that
-  // XXX one thing we might try is to use pinned memory for the temporary storage
-  //     this might allow us to correctly account for copy constructors
-  thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, temp.begin());
-
-  // finally, copy to the result
-  return thrust::copy(systems.system2, temp.begin(), temp.end(), result);
-}
-
-} // end detail
-
-
-// random access iterator to random access host iterator with non-trivial copy
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 copy_cross_system(cross_system<System1,System2> systems,
-                                          RandomAccessIterator1 begin,
-                                          RandomAccessIterator1 end,
-                                          RandomAccessIterator2 result,
-                                          thrust::random_access_traversal_tag,
-                                          thrust::random_access_traversal_tag,
-                                          thrust::detail::false_type) // is_trivial_copy
-{
-  // dispatch a non-trivial random access cross system copy based on whether or not the InputIterator is trivial
-  return detail::non_trivial_random_access_copy_cross_system(systems, begin, end, result,
-      typename thrust::detail::is_trivial_iterator<RandomAccessIterator1>::type());
-}
-
-// random access iterator to random access iterator
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 copy_cross_system(cross_system<System1,System2> systems,
-                                          RandomAccessIterator1 begin,
-                                          RandomAccessIterator1 end,
-                                          RandomAccessIterator2 result,
-                                          thrust::random_access_traversal_tag input_traversal,
-                                          thrust::random_access_traversal_tag output_traversal)
-{
-  // dispatch on whether this is a trivial copy
-  return copy_cross_system(systems, begin, end, result, input_traversal, output_traversal,
-          typename thrust::detail::dispatch::is_trivial_copy<RandomAccessIterator1,RandomAccessIterator2>::type());
-}
-
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename Size,
-         typename RandomAccessIterator2>
-  RandomAccessIterator2 copy_cross_system_n(cross_system<System1,System2> systems,
-                                            RandomAccessIterator1 first,
-                                            Size n,
-                                            RandomAccessIterator2 result,
-                                            thrust::random_access_traversal_tag input_traversal,
-                                            thrust::random_access_traversal_tag output_traversal)
-{
-  // implement with copy_cross_system
-  return copy_cross_system(systems, first, first + n, result, input_traversal, output_traversal);
-}
-
-/////////////////
-// Entry Point //
-/////////////////
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_cross_system(cross_system<System1,System2> systems,
-                                   InputIterator begin, 
-                                   InputIterator end, 
-                                   OutputIterator result)
-{
-  return copy_cross_system(systems, begin, end, result, 
-          typename thrust::iterator_traversal<InputIterator>::type(),
-          typename thrust::iterator_traversal<OutputIterator>::type());
-}
-
-template<typename System1,
-         typename System2,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_cross_system_n(cross_system<System1,System2> systems,
-                                     InputIterator begin, 
-                                     Size n, 
-                                     OutputIterator result)
-{
-  return copy_cross_system_n(systems, begin, n, result, 
-          typename thrust::iterator_traversal<InputIterator>::type(),
-          typename thrust::iterator_traversal<OutputIterator>::type());
-}
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.h b/compat/thrust/system/cuda/detail/copy_device_to_device.h
deleted file mode 100644
index a7d8df8613..0000000000
--- a/compat/thrust/system/cuda/detail/copy_device_to_device.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file copy_device_to_device.h
- *  \brief Device implementations for copying on the device.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_device_to_device(execution_policy<DerivedPolicy> &exec,
-                                       InputIterator begin, 
-                                       InputIterator end, 
-                                       OutputIterator result);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/copy_device_to_device.inl>
-
diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.inl b/compat/thrust/system/cuda/detail/copy_device_to_device.inl
deleted file mode 100644
index c8263c5d55..0000000000
--- a/compat/thrust/system/cuda/detail/copy_device_to_device.inl
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/copy_device_to_device.h>
-#include <thrust/system/cuda/detail/copy_cross_system.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/transform.h>
-#include <thrust/functional.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/cuda/detail/trivial_copy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_device_to_device(execution_policy<DerivedPolicy> &exec,
-                                       InputIterator begin, 
-                                       InputIterator end, 
-                                       OutputIterator result,
-                                       thrust::detail::false_type)
-{
-    // general case (mixed types)
-    typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-    return thrust::transform(exec, begin, end, result, thrust::identity<InputType>());
-#else
-    // we're not compiling with nvcc: copy [begin, end) to temp host memory
-    typename thrust::iterator_traits<InputIterator>::difference_type n = thrust::distance(begin, end);
-
-    thrust::host_system_tag temp_exec;
-    thrust::detail::temporary_array<InputType, thrust::host_system_tag> temp1(temp_exec, begin, end);
-
-    // transform temp1 to OutputType in host memory
-    typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-
-    thrust::detail::temporary_array<OutputType, thrust::host_system_tag> temp2(temp_exec, temp1.begin(), temp1.end());
-
-    // copy temp2 to device
-    result = thrust::system::cuda::detail::copy_cross_system(temp2.begin(), temp2.end(), result);
-
-    return result;
-#endif // THRUST_DEVICE_COMPILER_NVCC
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_device_to_device(execution_policy<DerivedPolicy> &exec,
-                                       InputIterator begin, 
-                                       InputIterator end, 
-                                       OutputIterator result,
-                                       thrust::detail::true_type)
-{
-    // specialization for device to device when the value_types match, operator= is not overloaded,
-    // and the iterators are pointers
-
-    // how many elements to copy?
-    typename thrust::iterator_traits<OutputIterator>::difference_type n = end - begin;
-
-    thrust::system::cuda::detail::trivial_copy_n(exec, begin, n, result);
-
-    return result + n;
-}
-
-} // end namespace detail
-
-/////////////////
-// Entry Point //
-/////////////////
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy_device_to_device(execution_policy<DerivedPolicy> &exec,
-                                       InputIterator begin, 
-                                       InputIterator end, 
-                                       OutputIterator result)
-{
-    typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-    typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-
-    const bool use_trivial_copy = 
-        thrust::detail::is_same<InputType, OutputType>::value
-        && thrust::detail::is_trivial_iterator<InputIterator>::value 
-        && thrust::detail::is_trivial_iterator<OutputIterator>::value;
-
-    // XXX WAR unused variable warning
-    (void) use_trivial_copy;
-
-    return detail::copy_device_to_device(exec, begin, end, result,
-            thrust::detail::integral_constant<bool, use_trivial_copy>());
-
-}
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/copy_if.h b/compat/thrust/system/cuda/detail/copy_if.h
deleted file mode 100644
index 5ed0f6c9c4..0000000000
--- a/compat/thrust/system/cuda/detail/copy_if.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-   OutputIterator copy_if(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator result,
-                          Predicate pred);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/copy_if.inl>
-
diff --git a/compat/thrust/system/cuda/detail/copy_if.inl b/compat/thrust/system/cuda/detail/copy_if.inl
deleted file mode 100644
index 15ea7faa82..0000000000
--- a/compat/thrust/system/cuda/detail/copy_if.inl
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/system/detail/internal/decompose.h>
-#include <thrust/scan.h>
-#include <thrust/system/cuda/detail/default_decomposition.h>
-#include <thrust/system/cuda/detail/reduce_intervals.h>
-#include <thrust/system/cuda/detail/block/inclusive_scan.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/functional.h>
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename Decomposition,
-          typename OutputIterator,
-          typename Context>
-struct copy_if_intervals_closure
-{
-  InputIterator1 input;
-  InputIterator2 stencil;
-  InputIterator3 offsets;
-  Decomposition decomp;
-  OutputIterator output;
-
-  typedef Context context_type;
-  context_type context;
-  
-  copy_if_intervals_closure(InputIterator1 input,
-                            InputIterator2 stencil,
-                            InputIterator3 offsets,
-                            Decomposition decomp,
-                            OutputIterator output,
-                            Context context = Context())
-    : input(input), stencil(stencil), offsets(offsets), decomp(decomp), output(output), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
-   
-    typedef unsigned int PredicateType;
-    
-    const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value;
-
-    thrust::plus<PredicateType> binary_op;
-
-    __shared__ PredicateType sdata[CTA_SIZE];  context.barrier();
-    
-    typedef typename Decomposition::index_type IndexType;
-
-    // this block processes results in [range.begin(), range.end())
-    thrust::system::detail::internal::index_range<IndexType> range = decomp[context.block_index()];
-
-    IndexType base = range.begin();
-
-    PredicateType predicate = 0;
-    
-    // advance input iterators to this thread's starting position
-    input   += base + context.thread_index();
-    stencil += base + context.thread_index();
-
-    // advance output to this interval's starting position
-    if (context.block_index() != 0)
-    {
-        InputIterator3 temp = offsets + (context.block_index() - 1);
-        output += *temp;
-    }
-
-    // process full blocks
-    while(base + CTA_SIZE <= range.end())
-    {
-        // read data
-        sdata[context.thread_index()] = predicate = *stencil;
-      
-        context.barrier();
-
-        // scan block
-        block::inclusive_scan(context, sdata, binary_op);
-       
-        // write data
-        if (predicate)
-        {
-            OutputIterator temp2 = output + (sdata[context.thread_index()] - 1);
-            *temp2 = *input;
-        }
-
-        // advance inputs by CTA_SIZE
-        base    += CTA_SIZE;
-        input   += CTA_SIZE;
-        stencil += CTA_SIZE;
-
-        // advance output by number of true predicates
-        output += sdata[CTA_SIZE - 1];
-
-        context.barrier();
-    }
-
-    // process partially full block at end of input (if necessary)
-    if (base < range.end())
-    {
-        // read data
-        if (base + context.thread_index() < range.end())
-            sdata[context.thread_index()] = predicate = *stencil;
-        else
-            sdata[context.thread_index()] = predicate = 0;
-       
-        context.barrier();
-
-        // scan block
-        block::inclusive_scan(context, sdata, binary_op);
-       
-        // write data
-        if (predicate) // expects predicate=false for >= interval_end
-        {
-            OutputIterator temp2 = output + (sdata[context.thread_index()] - 1);
-            *temp2 = *input;
-        }
-    }
-  }
-}; // copy_if_intervals_closure
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-   OutputIterator copy_if(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator output,
-                          Predicate pred)
-{
-  typedef typename thrust::iterator_difference<InputIterator1>::type IndexType;
-  typedef typename thrust::iterator_value<OutputIterator>::type      OutputType;
-
-  if (first == last)
-      return output;
-
-  typedef thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
-  typedef thrust::detail::temporary_array<IndexType, DerivedPolicy>          IndexArray;
-
-  Decomposition decomp = default_decomposition(last - first);
-
-  // storage for per-block predicate counts
-  IndexArray block_results(exec, decomp.size());
-
-  // convert stencil into an iterator that produces integral values in {0,1}
-  typedef typename thrust::detail::predicate_to_integral<Predicate,IndexType>              PredicateToIndexTransform;
-  typedef thrust::transform_iterator<PredicateToIndexTransform, InputIterator2, IndexType> PredicateToIndexIterator;
-
-  PredicateToIndexIterator predicate_stencil(stencil, PredicateToIndexTransform(pred));
-
-  // compute number of true values in each interval
-  thrust::system::cuda::detail::reduce_intervals(exec, predicate_stencil, block_results.begin(), thrust::plus<IndexType>(), decomp);
-
-  // scan the partial sums
-  thrust::inclusive_scan(exec, block_results.begin(), block_results.end(), block_results.begin(), thrust::plus<IndexType>());
-
-  // copy values to output
-  const unsigned int ThreadsPerBlock = 256;
-  typedef typename IndexArray::iterator InputIterator3;
-  typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-  typedef copy_if_intervals_closure<InputIterator1,PredicateToIndexIterator,InputIterator3,Decomposition,OutputIterator,Context> Closure;
-  Closure closure(first, predicate_stencil, block_results.begin(), decomp, output);
-  detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-
-  return output + block_results[decomp.size() - 1];
-} // end copy_if()
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
diff --git a/compat/thrust/system/cuda/detail/count.h b/compat/thrust/system/cuda/detail/count.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/count.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/cuda_launch_config.h b/compat/thrust/system/cuda/detail/cuda_launch_config.h
deleted file mode 100644
index b7f0ca2409..0000000000
--- a/compat/thrust/system/cuda/detail/cuda_launch_config.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <cstddef>
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-// XXX define our own device_properties_t to avoid errors when #including
-//     this file in the absence of a CUDA installation
-struct device_properties_t
-{
-  // mirror the type and spelling of cudaDeviceProp's members
-  // keep these alphabetized
-  int    major;
-  int    maxGridSize[3];
-  int    maxThreadsPerBlock;
-  int    maxThreadsPerMultiProcessor;
-  int    minor;
-  int    multiProcessorCount;
-  int    regsPerBlock;
-  size_t sharedMemPerBlock;
-  int    warpSize;
-};
-
-
-// XXX define our own device_properties_t to avoid errors when #including
-//     this file in the absence of a CUDA installation
-struct function_attributes_t
-{
-  // mirror the type and spelling of cudaFuncAttributes' members
-  // keep these alphabetized
-  size_t constSizeBytes;
-  size_t localSizeBytes;
-  int    maxThreadsPerBlock;
-  int    numRegs;
-  size_t sharedSizeBytes;
-};
-
-
-/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic.
- *  \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest.
- *  \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest.
- *  \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can
- *          accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by
- *          the "CUDA Occupancy Calculator". 
- *  \note The __global__ function of interest is presumed to use 0 bytes of dynamically-allocated __shared__ memory.
- */
-inline __host__ __device__
-std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes,
-                                                        const device_properties_t   &properties);
-
-/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic.
- *  Use this version of the function when a CUDA block's dynamically-allocated __shared__ memory requirements
- *  vary with the size of the block.
- *  \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest.
- *  \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest.
- *  \param block_size_to_dynamic_smem_bytes A unary function which maps an integer CUDA block size to the number of bytes
- *         of dynamically-allocated __shared__ memory required by a CUDA block of that size.
- *  \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can
- *          accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by
- *          the "CUDA Occupancy Calculator". 
- */
-template<typename UnaryFunction>
-inline __host__ __device__
-std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes,
-                                                        const device_properties_t   &properties,
-                                                        UnaryFunction block_size_to_dynamic_smem_size);
-
-
-/*! Returns the maximum amount of dynamic shared memory each block
- *  can utilize without reducing thread occupancy.
- *
- *  \param properties CUDA device properties
- *  \param attributes CUDA function attributes
- *  \param blocks_per_processor Number of blocks per streaming multiprocessor
- */
-inline __host__ __device__
-size_t proportional_smem_allocation(const device_properties_t   &properties,
-                                    const function_attributes_t &attributes,
-                                    size_t blocks_per_processor);
-
-
-template<typename UnaryFunction>
-inline __host__ __device__
-size_t max_blocksize_subject_to_smem_usage(const device_properties_t   &properties,
-                                           const function_attributes_t &attributes,
-                                           UnaryFunction blocksize_to_dynamic_smem_usage);
-
-
-
-namespace cuda_launch_config_detail
-{
-
-using std::size_t;
-
-namespace util
-{
-
-
-template<typename T>
-inline __host__ __device__
-T min_(const T &lhs, const T &rhs)
-{
-  return rhs < lhs ? rhs : lhs;
-}
-
-
-template <typename T>
-struct zero_function
-{
-  inline __host__ __device__
-  T operator()(T)
-  {
-    return 0;
-  }
-};
-
-
-// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc.
-template<typename L, typename R>
-  inline __host__ __device__ L divide_ri(const L x, const R y)
-{
-    return (x + (y - 1)) / y;
-}
-
-// x/y rounding towards zero for integers, used to determine # of blocks/warps etc.
-template<typename L, typename R>
-  inline __host__ __device__ L divide_rz(const L x, const R y)
-{
-    return x / y;
-}
-
-// round x towards infinity to the next multiple of y
-template<typename L, typename R>
-  inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); }
-
-// round x towards zero to the next multiple of y
-template<typename L, typename R>
-  inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); }
-
-} // end namespace util
-
-
-
-// granularity of shared memory allocation
-inline __host__ __device__
-size_t smem_allocation_unit(const device_properties_t &properties)
-{
-  switch(properties.major)
-  {
-    case 1:  return 512;
-    case 2:  return 128;
-    case 3:  return 256;
-    default: return 256; // unknown GPU; have to guess
-  }
-}
-
-
-// granularity of register allocation
-inline __host__ __device__
-size_t reg_allocation_unit(const device_properties_t &properties, const size_t regsPerThread)
-{
-  switch(properties.major)
-  {
-    case 1:  return (properties.minor <= 1) ? 256 : 512;
-    case 2:  switch(regsPerThread)
-             {
-               case 21:
-               case 22:
-               case 29:
-               case 30:
-               case 37:
-               case 38:
-               case 45:
-               case 46:
-                 return 128;
-               default:
-                 return 64;
-             }
-    case 3:  return 256;
-    default: return 256; // unknown GPU; have to guess
-  }
-}
-
-
-// granularity of warp allocation
-inline __host__ __device__
-size_t warp_allocation_multiple(const device_properties_t &properties)
-{
-  return (properties.major <= 1) ? 2 : 1;
-}
-
-// number of "sides" into which the multiprocessor is partitioned
-inline __host__ __device__
-size_t num_sides_per_multiprocessor(const device_properties_t &properties)
-{
-  switch(properties.major)
-  {
-    case 1:  return 1;
-    case 2:  return 2;
-    case 3:  return 4;
-    default: return 4; // unknown GPU; have to guess
-  }
-}
-
-
-inline __host__ __device__
-size_t max_blocks_per_multiprocessor(const device_properties_t &properties)
-{
-  return (properties.major <= 2) ? 8 : 16;
-}
-
-
-inline __host__ __device__
-size_t max_active_blocks_per_multiprocessor(const device_properties_t    &properties,
-                                            const function_attributes_t  &attributes,
-                                            int CTA_SIZE,
-                                            size_t dynamic_smem_bytes)
-{
-  // Determine the maximum number of CTAs that can be run simultaneously per SM
-  // This is equivalent to the calculation done in the CUDA Occupancy Calculator spreadsheet
-
-  //////////////////////////////////////////
-  // Limits due to threads/SM or blocks/SM
-  //////////////////////////////////////////
-  const size_t maxThreadsPerSM = properties.maxThreadsPerMultiProcessor;  // 768, 1024, 1536, etc.
-  const size_t maxBlocksPerSM  = max_blocks_per_multiprocessor(properties);
-
-  // Calc limits
-  const size_t ctaLimitThreads = (CTA_SIZE <= properties.maxThreadsPerBlock) ? maxThreadsPerSM / CTA_SIZE : 0;
-  const size_t ctaLimitBlocks  = maxBlocksPerSM;
-
-  //////////////////////////////////////////
-  // Limits due to shared memory/SM
-  //////////////////////////////////////////
-  const size_t smemAllocationUnit     = smem_allocation_unit(properties);
-  const size_t smemBytes  = attributes.sharedSizeBytes + dynamic_smem_bytes;
-  const size_t smemPerCTA = util::round_i(smemBytes, smemAllocationUnit);
-
-  // Calc limit
-  const size_t ctaLimitSMem = smemPerCTA > 0 ? properties.sharedMemPerBlock / smemPerCTA : maxBlocksPerSM;
-
-  //////////////////////////////////////////
-  // Limits due to registers/SM
-  //////////////////////////////////////////
-  const size_t regAllocationUnit      = reg_allocation_unit(properties, attributes.numRegs);
-  const size_t warpAllocationMultiple = warp_allocation_multiple(properties);
-  const size_t numWarps = util::round_i(util::divide_ri(CTA_SIZE, properties.warpSize), warpAllocationMultiple);
-
-  // Calc limit
-  size_t ctaLimitRegs;
-  if(properties.major <= 1)
-  {
-    // GPUs of compute capability 1.x allocate registers to CTAs
-    // Number of regs per block is regs per thread times number of warps times warp size, rounded up to allocation unit
-    const size_t regsPerCTA = util::round_i(attributes.numRegs * properties.warpSize * numWarps, regAllocationUnit);
-    ctaLimitRegs = regsPerCTA > 0 ? properties.regsPerBlock / regsPerCTA : maxBlocksPerSM;
-  }
-  else
-  {
-    // GPUs of compute capability 2.x and higher allocate registers to warps
-    // Number of regs per warp is regs per thread times times warp size, rounded up to allocation unit
-    const size_t regsPerWarp = util::round_i(attributes.numRegs * properties.warpSize, regAllocationUnit);
-    const size_t numSides = num_sides_per_multiprocessor(properties);
-    const size_t numRegsPerSide = properties.regsPerBlock / numSides;
-    ctaLimitRegs = regsPerWarp > 0 ? ((numRegsPerSide / regsPerWarp) * numSides) / numWarps : maxBlocksPerSM;
-  }
-
-  //////////////////////////////////////////
-  // Overall limit is min() of limits due to above reasons
-  //////////////////////////////////////////
-  return util::min_(ctaLimitRegs, util::min_(ctaLimitSMem, util::min_(ctaLimitThreads, ctaLimitBlocks)));
-}
-
-
-} // end namespace cuda_launch_config_detail
-
-
-template<typename UnaryFunction>
-inline __host__ __device__
-std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes,
-                                                        const device_properties_t   &properties,
-                                                        UnaryFunction block_size_to_dynamic_smem_size)
-{
-  size_t max_occupancy      = properties.maxThreadsPerMultiProcessor;
-  size_t largest_blocksize  = cuda_launch_config_detail::util::min_(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock);
-  size_t granularity        = properties.warpSize;
-  size_t max_blocksize      = 0;
-  size_t highest_occupancy  = 0;
-
-  for(size_t blocksize = largest_blocksize; blocksize != 0; blocksize -= granularity)
-  {
-    size_t occupancy = blocksize * cuda_launch_config_detail::max_active_blocks_per_multiprocessor(properties, attributes, blocksize, block_size_to_dynamic_smem_size(blocksize));
-
-    if(occupancy > highest_occupancy)
-    {
-      max_blocksize = blocksize;
-      highest_occupancy = occupancy;
-    }
-
-    // early out, can't do better
-    if(highest_occupancy == max_occupancy)
-      break;
-  }
-
-  return max_blocksize;
-}
-
-
-inline __host__ __device__
-std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes,
-                                                        const device_properties_t   &properties)
-{
-  return block_size_with_maximum_potential_occupancy(attributes, properties, cuda_launch_config_detail::util::zero_function<std::size_t>());
-}
-
-
-inline __host__ __device__
-size_t proportional_smem_allocation(const device_properties_t   &properties,
-                                    const function_attributes_t &attributes,
-                                    size_t blocks_per_processor)
-{
-  size_t smem_per_processor    = properties.sharedMemPerBlock;
-  size_t smem_allocation_unit  = cuda_launch_config_detail::smem_allocation_unit(properties);
-
-  size_t total_smem_per_block  = cuda_launch_config_detail::util::round_z(smem_per_processor / blocks_per_processor, smem_allocation_unit);
-  size_t static_smem_per_block = attributes.sharedSizeBytes;
-  
-  return total_smem_per_block - static_smem_per_block;
-}
-
-
-template<typename UnaryFunction>
-inline __host__ __device__
-size_t max_blocksize_subject_to_smem_usage(const device_properties_t   &properties,
-                                           const function_attributes_t &attributes,
-                                           UnaryFunction blocksize_to_dynamic_smem_usage)
-{
-  size_t largest_blocksize = (thrust::min)(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock);
-  size_t granularity = properties.warpSize;
-  
-  for(int blocksize = largest_blocksize; blocksize > 0; blocksize -= granularity)
-  {
-    size_t total_smem_usage = blocksize_to_dynamic_smem_usage(blocksize) + attributes.sharedSizeBytes;
-
-    if(total_smem_usage <= properties.sharedMemPerBlock)
-    {
-      return blocksize;
-    }
-  }
-
-  return 0;
-}
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/default_decomposition.h b/compat/thrust/system/cuda/detail/default_decomposition.h
deleted file mode 100644
index 1ed6bcfe20..0000000000
--- a/compat/thrust/system/cuda/detail/default_decomposition.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file default_decomposition.h
- *  \brief Return a decomposition that is appropriate for the CUDA backend.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/internal/decompose.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename IndexType>
-thrust::system::detail::internal::uniform_decomposition<IndexType> default_decomposition(IndexType n);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/default_decomposition.inl>
-
diff --git a/compat/thrust/system/cuda/detail/default_decomposition.inl b/compat/thrust/system/cuda/detail/default_decomposition.inl
deleted file mode 100644
index 3f0879ac93..0000000000
--- a/compat/thrust/system/cuda/detail/default_decomposition.inl
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename IndexType>
-thrust::system::detail::internal::uniform_decomposition<IndexType> default_decomposition(IndexType n)
-{
-  // TODO eliminate magical constant
-  device_properties_t properties = device_properties();
-  return thrust::system::detail::internal::uniform_decomposition<IndexType>(n, properties.maxThreadsPerBlock, 10 * properties.multiProcessorCount);
-}
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/alignment.h b/compat/thrust/system/cuda/detail/detail/alignment.h
deleted file mode 100644
index 31fdaaf422..0000000000
--- a/compat/thrust/system/cuda/detail/detail/alignment.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace alignment_of_detail
-{
-
-
-template<typename T> class alignment_of_impl;
-
-template<typename T, std::size_t size_diff>
-  struct helper
-{
-  static const std::size_t value = size_diff;
-};
-
-template<typename T>
-  class helper<T,0>
-{
-  public:
-    static const std::size_t value = alignment_of_impl<T>::value;
-};
-
-template<typename T>
-  class alignment_of_impl
-{
-  private:
-    struct big { T x; char c; };
-
-  public:
-    static const std::size_t value = helper<big, sizeof(big) - sizeof(T)>::value;
-};
-
-
-} // end alignment_of_detail
-
-
-template<typename T>
-  struct alignment_of
-    : alignment_of_detail::alignment_of_impl<T>
-{};
-
-
-template<std::size_t Align> struct aligned_type;
-
-// __align__ is CUDA-specific, so guard it
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-
-// implementing aligned_type portably is tricky:
-
-#  if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC
-// implement aligned_type with specialization because MSVC
-// requires literals as arguments to declspec(align(n))
-template<> struct aligned_type<1>
-{
-  struct __align__(1) type { };
-};
-
-template<> struct aligned_type<2>
-{
-  struct __align__(2) type { };
-};
-
-template<> struct aligned_type<4>
-{
-  struct __align__(4) type { };
-};
-
-template<> struct aligned_type<8>
-{
-  struct __align__(8) type { };
-};
-
-template<> struct aligned_type<16>
-{
-  struct __align__(16) type { };
-};
-
-template<> struct aligned_type<32>
-{
-  struct __align__(32) type { };
-};
-
-template<> struct aligned_type<64>
-{
-  struct __align__(64) type { };
-};
-
-template<> struct aligned_type<128>
-{
-  struct __align__(128) type { };
-};
-
-template<> struct aligned_type<256>
-{
-  struct __align__(256) type { };
-};
-
-template<> struct aligned_type<512>
-{
-  struct __align__(512) type { };
-};
-
-template<> struct aligned_type<1024>
-{
-  struct __align__(1024) type { };
-};
-
-template<> struct aligned_type<2048>
-{
-  struct __align__(2048) type { };
-};
-
-template<> struct aligned_type<4096>
-{
-  struct __align__(4096) type { };
-};
-
-template<> struct aligned_type<8192>
-{
-  struct __align__(8192) type { };
-};
-#  elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40300)
-// implement aligned_type with specialization because gcc 4.2
-// requires literals as arguments to __attribute__(aligned(n))
-template<> struct aligned_type<1>
-{
-  struct __align__(1) type { };
-};
-
-template<> struct aligned_type<2>
-{
-  struct __align__(2) type { };
-};
-
-template<> struct aligned_type<4>
-{
-  struct __align__(4) type { };
-};
-
-template<> struct aligned_type<8>
-{
-  struct __align__(8) type { };
-};
-
-template<> struct aligned_type<16>
-{
-  struct __align__(16) type { };
-};
-
-template<> struct aligned_type<32>
-{
-  struct __align__(32) type { };
-};
-
-template<> struct aligned_type<64>
-{
-  struct __align__(64) type { };
-};
-
-template<> struct aligned_type<128>
-{
-  struct __align__(128) type { };
-};
-
-#  else
-// assume the compiler allows template parameters as
-// arguments to __align__ 
-template<std::size_t Align> struct aligned_type
-{
-  struct __align__(Align) type { };
-};
-#  endif // THRUST_HOST_COMPILER
-#else
-template<std::size_t Align> struct aligned_type
-{
-  struct type { };
-};
-#endif // THRUST_DEVICE_COMPILER
-
-
-template<std::size_t Len, std::size_t Align>
-  struct aligned_storage
-{
-  union type
-  {
-    unsigned char data[Len];
-
-    typename aligned_type<Align>::type align;
-  };
-};
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h b/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h
deleted file mode 100644
index e2c5a44941..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h
+++ /dev/null
@@ -1,284 +0,0 @@
-/**
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- */
-
-
-//------------------------------------------------------------------------------
-// Common B40C Defines, Properties, and Routines 
-//------------------------------------------------------------------------------
-
-
-#pragma once
-
-#include <cuda.h>
-#include <thrust/system/cuda/detail/synchronize.h>
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-//------------------------------------------------------------------------------
-// Device properties 
-//------------------------------------------------------------------------------
-
-
-#ifndef __CUDA_ARCH__
-	#define __CUDA_ARCH__ 0
-#endif
-
-#define B40C_FERMI(version)								(version >= 200)
-#define B40C_LOG_WARP_THREADS							5									// 32 threads in a warp
-#define B40C_WARP_THREADS								(1 << B40C_LOG_WARP_THREADS)
-#define B40C_LOG_MEM_BANKS(version) 					((version >= 200) ? 5 : 4)			// 32 banks on fermi, 16 on tesla
-#define B40C_MEM_BANKS(version)							(1 << B40C_LOG_MEM_BANKS(version))
-
-// TODO refactor these
-#if __CUDA_ARCH__ >= 200
-	#define FastMul(a, b) (a * b)
-#else
-	#define FastMul(a, b) (__umul24(a, b))
-#endif	
-
-#if __CUDA_ARCH__ >= 120
-	#define WarpVoteAll(active_threads, predicate) (__all(predicate))
-#else 
-	#define WarpVoteAll(active_threads, predicate) (EmulatedWarpVoteAll<active_threads>(predicate))
-#endif
-
-#if __CUDA_ARCH__ >= 200
-	#define TallyWarpVote(active_threads, predicate, storage) (__popc(__ballot(predicate)))
-#else 
-	#define TallyWarpVote(active_threads, predicate, storage) (TallyWarpVoteSm10<active_threads>(predicate, storage))
-#endif
-
-#ifdef __LP64__
-	#define _B40C_LP64_ true
-#else
-	#define _B40C_LP64_ false
-#endif
-
-#define _B40C_REG_MISER_QUALIFIER_ __shared__
-
-
-//------------------------------------------------------------------------------
-// Handy routines 
-//------------------------------------------------------------------------------
-
-
-/**
- * Select maximum
- */
-#define B40C_MAX(a, b) ((a > b) ? a : b)
-
-
-/**
- * MagnitudeShift().  Allows you to shift left for positive magnitude values, 
- * right for negative.   
- * 
- * N.B. This code is a little strange; we are using this meta-programming 
- * pattern of partial template specialization for structures in order to 
- * decide whether to shift left or right.  Normally we would just use a 
- * conditional to decide if something was negative or not and then shift 
- * accordingly, knowing that the compiler will elide the untaken branch, 
- * i.e., the out-of-bounds shift during dead code elimination. However, 
- * the pass for bounds-checking shifts seems to happen before the DCE 
- * phase, which results in a an unsightly number of compiler warnings, so 
- * we force the issue earlier using structural template specialization.
- */
-
-template <typename K, int magnitude, bool shift_left> struct MagnitudeShiftOp;
-
-template <typename K, int magnitude> 
-struct MagnitudeShiftOp<K, magnitude, true> {
-	__device__ __forceinline__ static K Shift(K key) {
-		return key << magnitude;
-	}
-};
-
-template <typename K, int magnitude> 
-struct MagnitudeShiftOp<K, magnitude, false> {
-	__device__ __forceinline__ static K Shift(K key) {
-		return key >> magnitude;
-	}
-};
-
-template <typename K, int magnitude> 
-__device__ __forceinline__ K MagnitudeShift(K key) {
-	return MagnitudeShiftOp<K, (magnitude > 0) ? magnitude : magnitude * -1, (magnitude > 0)>::Shift(key);
-}
-
-
-/**
- * Supress warnings for unused constants
- */
-template <typename T>
-__device__ __forceinline__ void SuppressUnusedConstantWarning(const T) {}
-
-
-
-
-//------------------------------------------------------------------------------
-// Common device routines
-//------------------------------------------------------------------------------
-
-
-/**
- * Perform a warp-synchrounous prefix scan.  Allows for diverting a warp's
- * threads into separate scan problems (multi-scan). 
- */
-template <int NUM_ELEMENTS, bool MULTI_SCAN>
-__device__ __forceinline__ int WarpScan(
-	volatile int warpscan[][NUM_ELEMENTS],
-	int partial_reduction,
-	int copy_section) {
-	
-	int warpscan_idx;
-	if (MULTI_SCAN) {
-		warpscan_idx = threadIdx.x & (NUM_ELEMENTS - 1);
-	} else {
-		warpscan_idx = threadIdx.x;
-	}
-
-	warpscan[1][warpscan_idx] = partial_reduction;
-
-	if (NUM_ELEMENTS > 1) warpscan[1][warpscan_idx] = partial_reduction = 
-			partial_reduction + warpscan[1][warpscan_idx - 1];
-	if (NUM_ELEMENTS > 2) warpscan[1][warpscan_idx] = partial_reduction = 
-			partial_reduction + warpscan[1][warpscan_idx - 2];
-	if (NUM_ELEMENTS > 4) warpscan[1][warpscan_idx] = partial_reduction = 
-			partial_reduction + warpscan[1][warpscan_idx - 4];
-	if (NUM_ELEMENTS > 8) warpscan[1][warpscan_idx] = partial_reduction = 
-			partial_reduction + warpscan[1][warpscan_idx - 8];
-	if (NUM_ELEMENTS > 16) warpscan[1][warpscan_idx] = partial_reduction = 
-			partial_reduction + warpscan[1][warpscan_idx - 16];
-	
-	if (copy_section > 0) {
-		warpscan[1 + copy_section][warpscan_idx] = partial_reduction;
-	}
-	
-	return warpscan[1][warpscan_idx - 1];
-}
-
-/**
- * Perform a warp-synchronous reduction
- */
-template <int NUM_ELEMENTS>
-__device__ __forceinline__ void WarpReduce(
-	int idx,
-	volatile int *storage,
-	int partial_reduction)
-{
-	storage[idx] = partial_reduction;
-
-	if (NUM_ELEMENTS > 16) storage[idx] = partial_reduction = partial_reduction + storage[idx + 16];
-	if (NUM_ELEMENTS > 8) storage[idx] = partial_reduction = partial_reduction + storage[idx + 8];
-	if (NUM_ELEMENTS > 4) storage[idx] = partial_reduction = partial_reduction + storage[idx + 4];
-	if (NUM_ELEMENTS > 2) storage[idx] = partial_reduction = partial_reduction + storage[idx + 2];
-	if (NUM_ELEMENTS > 1) storage[idx] = partial_reduction = partial_reduction + storage[idx + 1];
-}
-
-
-/**
- * Tally a warp-vote regarding the given predicate using the supplied storage
- */
-template <int ACTIVE_THREADS>
-__device__ __forceinline__ int TallyWarpVoteSm10(int predicate, int storage[]) {
-	WarpReduce<ACTIVE_THREADS>(threadIdx.x, storage, predicate);
-	return storage[0];
-}
-
-
-/**
- * Tally a warp-vote regarding the given predicate
- */
-template <int ACTIVE_THREADS>
-__device__ __forceinline__ int TallyWarpVoteSm10(int predicate) {
-  __shared__ int vote_reduction[B40C_WARP_THREADS];
-  return TallyWarpVoteSm10<ACTIVE_THREADS>(predicate, vote_reduction);
-}
-
-/**
- * Emulate the __all() warp vote instruction
- */
-template <int ACTIVE_THREADS>
-__device__ __forceinline__ int EmulatedWarpVoteAll(int predicate) {
-	return (TallyWarpVoteSm10<ACTIVE_THREADS>(predicate) == ACTIVE_THREADS);
-}
-
-
-/**
- * Have each thread concurrently perform a serial reduction over its specified segment 
- */
-template <int LENGTH>
-__device__ __forceinline__ int
-SerialReduce(int segment[]) {
-	
-	int reduce = segment[0];
-
-	#pragma unroll
-	for (int i = 1; i < (int) LENGTH; i++) {
-		reduce += segment[i];
-	}
-	
-	return reduce;
-}
-
-
-/**
- * Have each thread concurrently perform a serial scan over its specified segment
- */
-template <int LENGTH>
-__device__ __forceinline__
-void SerialScan(int segment[], int seed0) {
-	
-	int seed1;
-
-	#pragma unroll	
-	for (int i = 0; i < (int) LENGTH; i += 2) {
-		seed1 = segment[i] + seed0;
-		segment[i] = seed0;
-		seed0 = seed1 + segment[i + 1];
-		segment[i + 1] = seed1;
-	}
-}
-
-
-
-
-//------------------------------------------------------------------------------
-// Empty Kernels
-//------------------------------------------------------------------------------
-
-template <typename T>
-__global__ void FlushKernel(void)
-{
-}
-
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h
deleted file mode 100644
index 2b199bb08a..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h
+++ /dev/null
@@ -1,807 +0,0 @@
-/******************************************************************************
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- ******************************************************************************/
-
-
-
-/******************************************************************************
- * Radix Sorting API
- *
- * USAGE:
- * 
- * Using the B40C radix sorting implementation is easy.  Just #include this API 
- * file and its kernel include dependencies within your source.  Below are two
- * examples for using: 
- *
- * (1) A keys-only example for sorting floats:
- * 
- *		// Create storage-management structure
- * 		RadixSortStorage<float> device_storage(d_float_keys);			
- *
- *		// Create and enact sorter
- * 		RadixSortingEnactor sorter<float>(d_float_keys_len);
- *		sorter.EnactSort(device_storage);
- *
- *		// Re-acquire pointer to sorted keys, free unused/temp storage 
- *		d_float_keys = device_storage.d_keys;
- *		device_storage.CleanupTempStorage();
- *
- * (2) And a key-value example for sorting ints paired with doubles:
- *
- *		// Create storage-management structure
- * 		RadixSortStorage<int, double> device_storage(d_int_keys, d_double_values);			
- *
- *		// Create and enact sorter
- * 		RadixSortingEnactor sorter<int, double>(d_int_keys_len);
- *		sorter.EnactSort(device_storage);
- *
- *		// Re-acquire pointer to sorted keys and values, free unused/temp storage 
- *		d_int_keys = device_storage.d_keys;
- *		d_double_values = device_storage.d_values;
- *		device_storage.CleanupTempStorage();
- *
- *
- ******************************************************************************/
-
-#pragma once
-
-#include <stdlib.h> 
-#include <stdio.h> 
-#include <string.h> 
-#include <math.h> 
-#include <float.h>
-
-#include "radixsort_reduction_kernel.h"
-#include "radixsort_spine_kernel.h"
-#include "radixsort_scanscatter_kernel.h"
-
-#include <thrust/swap.h>
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-
-/******************************************************************************
- * Debugging options
- ******************************************************************************/
-
-static bool RADIXSORT_DEBUG = false;
-
-
-
-/******************************************************************************
- * Structures for mananging device-side sorting state
- ******************************************************************************/
-
-/**
- * Sorting storage-management structure for device vectors
- */
-template <typename K, typename V = KeysOnlyType>
-struct RadixSortStorage {
-
-	// Device vector of keys to sort
-	K* d_keys;
-	
-	// Device vector of values to sort
-	V* d_values;
-
-	// Ancillary device vector for key storage 
-	K* d_alt_keys;
-
-	// Ancillary device vector for value storage
-	V* d_alt_values;
-
-	// Temporary device storage needed for radix sorting histograms
-	int *d_spine;
-	
-	// Flip-flopping temporary device storage denoting which digit place 
-	// pass should read from which input source (i.e., false if reading from 
-	// keys, true if reading from alternate_keys
-	bool *d_from_alt_storage;
-
-	// Host-side boolean whether or not an odd number of sorting passes left the 
-	// results in alternate storage.  If so, the d_keys (and d_values) pointers 
-	// will have been swapped with the d_alt_keys (and d_alt_values) pointers in order to 
-	// point to the final results.
-	bool using_alternate_storage;
-	
-	// Constructor
-	RadixSortStorage(K* keys = NULL, V* values = NULL) 
-	{ 
-		d_keys = keys; 
-		d_values = values; 
-		d_alt_keys = NULL; 
-		d_alt_values = NULL; 
-		d_spine = NULL;
-		d_from_alt_storage = NULL;
-		
-		using_alternate_storage = false;
-	}
-
-	// Clean up non-results storage (may include freeing original storage if 
-	// primary pointers were swizzled as per using_alternate_storage) 
-	cudaError_t CleanupTempStorage() 
-	{
-		if (d_alt_keys) cudaFree(d_alt_keys);
-		if (d_alt_values) cudaFree(d_alt_values);
-		if (d_spine) cudaFree(d_spine);
-		if (d_from_alt_storage) cudaFree(d_from_alt_storage);
-		
-		return cudaSuccess;
-	}
-};
-
-
-
-/******************************************************************************
- * Base class for sorting enactors
- ******************************************************************************/
-
-
-/**
- * Base class for SRTS radix sorting enactors.
- */
-template <typename K, typename V>
-class BaseRadixSortingEnactor 
-{
-public:
-	
-	// Unsigned integer type suitable for radix sorting of keys
-	typedef typename KeyConversion<K>::UnsignedBits ConvertedKeyType;
-
-protected:
-
-	//
-	// Information about our problem configuration
-	//
-	
-	bool				_keys_only;
-	unsigned int 		_num_elements;
-	int 				_cycle_elements;
-	int 				_spine_elements;
-	int 				_grid_size;
-	CtaDecomposition 	_work_decomposition;
-	int 				_passes;
-	bool 				_swizzle_pointers_for_odd_passes;
-
-	// Information about our target device
-	cudaDeviceProp 		_device_props;
-	int 				_device_sm_version;
-	
-	// Information about our kernel assembly
-	int 				_kernel_ptx_version;
-	cudaFuncAttributes 	_spine_scan_kernel_attrs;
-	
-protected:
-	
-	/**
-	 * Constructor.
-	 */
-	BaseRadixSortingEnactor(int passes, int radix_bits, unsigned int num_elements, int max_grid_size, bool swizzle_pointers_for_odd_passes = true); 
-	
-	/**
-	 * Heuristic for determining the number of CTAs to launch.
-	 *   
-	 * @param[in] 		max_grid_size  
-	 * 		Maximum allowable number of CTAs to launch.  A value of 0 indicates 
-	 * 		that the default value should be used.
-	 * 
-	 * @return The actual number of CTAs that should be launched
-	 */
-	int GridSize(int max_grid_size);
-
-	/**
-	 * Performs a distribution sorting pass over a single digit place
-	 */
-	template <int PASS, int RADIX_BITS, int BIT, typename PreprocessFunctor, typename PostprocessFunctor>
-	cudaError_t DigitPlacePass(const RadixSortStorage<ConvertedKeyType, V> &converted_storage); 
-	
-	/**
-	 * Enacts a sorting operation by performing the the appropriate 
-	 * digit-place passes.  To be overloaded by specialized subclasses.
-	 */
-	virtual cudaError_t EnactDigitPlacePasses(const RadixSortStorage<ConvertedKeyType, V> &converted_storage) = 0;
-	
-public:
-	
-	/**
-	 * Returns the length (in unsigned ints) of the device vector needed for  
-	 * temporary storage of the reduction spine.  Useful if pre-allocating 
-	 * your own device storage (as opposed to letting EnactSort() allocate it
-	 * for you).
-	 */
-	int SpineElements() { return _spine_elements; }
-
-	/**
-	 * Returns whether or not the problem will fit on the device.
-	 */
-	bool CanFit();
-
-	/**
-	 * Enacts a radix sorting operation on the specified device data.
-	 * 
-	 * IMPORTANT NOTES: The device storage backing the specified input vectors of 
-	 * keys (and data) will be modified.  (I.e., treat this as an in-place sort.)  
-	 * 
-	 * Additionally, the pointers in the problem_storage structure may be updated 
-	 * (a) depending upon the number of digit-place sorting passes needed, and (b) 
-	 * whether or not the caller has already allocated temporary storage.  
-	 * 
-	 * The sorted results will always be referenced by problem_storage.d_keys (and 
-	 * problem_storage.d_values).  However, for an odd number of sorting passes (uncommon)
-	 * these results will actually be backed by the storage initially allocated for 
-	 * by problem_storage.d_alt_keys (and problem_storage.d_alt_values).  If so, 
-	 * problem_storage.d_alt_keys and problem_storage.d_alt_keys will be updated to 
-	 * reference the original problem_storage.d_keys and problem_storage.d_values in order 
-	 * to facilitate cleanup.  
-	 * 
-	 * This means it is important to avoid keeping stale copies of device pointers 
-	 * to keys/data; you will want to re-reference the pointers in problem_storage.
-	 * 
-	 * @param[in/out] 	problem_storage 
-	 * 		Device vectors of keys and values to sort, and ancillary storage 
-	 * 		needed by the sorting kernels. See the IMPORTANT NOTES above. 
-	 * 
-	 * 		The problem_storage.[alternate_keys|alternate_values|d_spine] fields are 
-	 * 		temporary storage needed by the sorting kernels.  To facilitate 
-	 * 		speed, callers are welcome to re-use this storage for same-sized 
-	 * 		(or smaller) sortign problems. If NULL, these storage vectors will be 
-	 *      allocated by this routine (and must be subsequently cuda-freed by 
-	 *      the caller).
-	 *
-	 * @return cudaSuccess on success, error enumeration otherwise
-	 */
-	cudaError_t EnactSort(RadixSortStorage<K, V> &problem_storage);	
-
-    /*
-     * Destructor
-     */
-    virtual ~BaseRadixSortingEnactor() {}
-};
-
-
-
-template <typename K, typename V>
-BaseRadixSortingEnactor<K, V>::BaseRadixSortingEnactor(
-	int passes, 
-	int max_radix_bits, 
-	unsigned int num_elements, 
-	int max_grid_size,
-	bool swizzle_pointers_for_odd_passes) 
-{
-	//
-	// Get current device properties 
-	//
-
-	int current_device;
-	cudaGetDevice(&current_device);
-	cudaGetDeviceProperties(&_device_props, current_device);
-	_device_sm_version = _device_props.major * 100 + _device_props.minor * 10;
-
-	
-	//
-	// Get SM version of compiled kernel assembly
-	//
-	cudaFuncGetAttributes(&_spine_scan_kernel_attrs, SrtsScanSpine<void>);
-	_kernel_ptx_version = _spine_scan_kernel_attrs.ptxVersion * 10;
-	
-
-	//
-	// Determine number of CTAs to launch, shared memory, cycle elements, etc.
-	//
-
-	_passes								= passes;
-	_num_elements 						= num_elements;
-	_keys_only 							= IsKeysOnly<V>();
-	_cycle_elements 					= B40C_RADIXSORT_CYCLE_ELEMENTS(_kernel_ptx_version , ConvertedKeyType, V);
-	_grid_size 							= GridSize(max_grid_size);
-	_swizzle_pointers_for_odd_passes	= swizzle_pointers_for_odd_passes;
-	
-	int total_cycles 			= _num_elements / _cycle_elements;
-	unsigned int cycles_per_block 		= total_cycles / _grid_size;						
-	unsigned int extra_cycles 			= total_cycles - (cycles_per_block * _grid_size);
-
-	CtaDecomposition work_decomposition = {
-		extra_cycles,										// num_big_blocks
-		(cycles_per_block + 1) * _cycle_elements,			// big_block_elements
-		cycles_per_block * _cycle_elements,					// normal_block_elements
-		_num_elements - (total_cycles * _cycle_elements),	// extra_elements_last_block
-		_num_elements};										// num_elements
-	
-	_work_decomposition = work_decomposition;
-	
-	int spine_cycles = ((_grid_size * (1 << max_radix_bits)) + B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS - 1) / B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS;
-	_spine_elements = spine_cycles * B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS;
-}
-
-
-
-template <typename K, typename V>
-int BaseRadixSortingEnactor<K, V>::GridSize(int max_grid_size)
-{
-	const int SINGLE_CTA_CUTOFF = 0;		// right now zero; we have no single-cta sorting
-
-	// find maximum number of threadblocks if "use-default"
-	if (max_grid_size == 0) {
-
-		if (_num_elements <= static_cast<unsigned int>(SINGLE_CTA_CUTOFF)) {
-
-			// The problem size is too small to warrant a two-level reduction: 
-			// use only one stream-processor
-			max_grid_size = 1;
-
-		} else {
-
-			if (_device_sm_version <= 120) {
-				
-				// G80/G90
-				max_grid_size = _device_props.multiProcessorCount * 4;
-				
-			} else if (_device_sm_version < 200) {
-				
-				// GT200 (has some kind of TLB or icache drama)
-				int orig_max_grid_size = _device_props.multiProcessorCount * B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(_kernel_ptx_version);
-				if (_keys_only) { 
-					orig_max_grid_size *= (_num_elements + (1024 * 1024 * 96) - 1) / (1024 * 1024 * 96);
-				} else {
-					orig_max_grid_size *= (_num_elements + (1024 * 1024 * 64) - 1) / (1024 * 1024 * 64);
-				}
-				max_grid_size = orig_max_grid_size;
-
-				if (_num_elements / _cycle_elements > static_cast<unsigned int>(max_grid_size)) {
-	
-					double multiplier1 = 4.0;
-					double multiplier2 = 16.0;
-
-					double delta1 = 0.068;
-					double delta2 = 0.127;	
-	
-					int dividend = (_num_elements + _cycle_elements - 1) / _cycle_elements;
-	
-					while(true) {
-	
-						double quotient = ((double) dividend) / (multiplier1 * max_grid_size);
-						quotient -= (int) quotient;
-
-						if ((quotient > delta1) && (quotient < 1 - delta1)) {
-
-							quotient = ((double) dividend) / (multiplier2 * max_grid_size / 3.0);
-							quotient -= (int) quotient;
-
-							if ((quotient > delta2) && (quotient < 1 - delta2)) {
-								break;
-							}
-						}
-						
-						if (max_grid_size == orig_max_grid_size - 2) {
-							max_grid_size = orig_max_grid_size - 30;
-						} else {
-							max_grid_size -= 1;
-						}
-					}
-				}
-			} else {
-				
-				// GF100
-				max_grid_size = 418;
-			}
-		}
-	}
-
-	// Calculate the actual number of threadblocks to launch.  Initially
-	// assume that each threadblock will do only one cycle_elements worth 
-	// of work, but then clamp it by the "max" restriction derived above
-	// in order to accomodate the "single-sp" and "saturated" cases.
-
-	int grid_size = _num_elements / _cycle_elements;
-	if (grid_size == 0) {
-		grid_size = 1;
-	}
-	if (grid_size > max_grid_size) {
-		grid_size = max_grid_size;
-	} 
-
-	return grid_size;
-}
-
-
-
-template <typename K, typename V>
-bool BaseRadixSortingEnactor<K, V>::
-CanFit() 
-{
-	long long bytes = (_num_elements * sizeof(K) * 2) + (_spine_elements * sizeof(int));
-	if (!_keys_only) bytes += _num_elements * sizeof(V) * 2;
-
-	if (_device_props.totalGlobalMem < 1024 * 1024 * 513) {
-		return (bytes < ((double) _device_props.totalGlobalMem) * 0.81); 	// allow up to 81% capacity for 512MB   
-	}
-	
-	return (bytes < ((double) _device_props.totalGlobalMem) * 0.89); 	// allow up to 90% capacity 
-}
-
-
-
-template <typename K, typename V>
-template <int PASS, int RADIX_BITS, int BIT, typename PreprocessFunctor, typename PostprocessFunctor>
-cudaError_t BaseRadixSortingEnactor<K, V>::
-DigitPlacePass(const RadixSortStorage<ConvertedKeyType, V> &converted_storage)
-{
-	int threads = B40C_RADIXSORT_THREADS;
-	int dynamic_smem;
-
-	cudaFuncAttributes reduce_kernel_attrs, scan_scatter_attrs;
-	cudaFuncGetAttributes(&reduce_kernel_attrs, RakingReduction<ConvertedKeyType, V, PASS, RADIX_BITS, BIT, PreprocessFunctor>);
-	cudaFuncGetAttributes(&scan_scatter_attrs, ScanScatterDigits<ConvertedKeyType, V, PASS, RADIX_BITS, BIT, PreprocessFunctor, PostprocessFunctor>);
-	
-	//
-	// Counting Reduction
-	//
-
-	// Run tesla flush kernel if we have two or more threadblocks for each of the SMs
-	if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast<unsigned int>(_device_props.multiProcessorCount * _cycle_elements * 2))) { 
-		FlushKernel<void><<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>();
-		synchronize_if_enabled("FlushKernel");
-	}
-
-	// GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels)
-	dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - reduce_kernel_attrs.sharedSizeBytes : 0;
-
-	RakingReduction<ConvertedKeyType, V, PASS, RADIX_BITS, BIT, PreprocessFunctor> <<<_grid_size, threads, dynamic_smem>>>(
-		converted_storage.d_from_alt_storage,
-		converted_storage.d_spine,
-		converted_storage.d_keys,
-		converted_storage.d_alt_keys,
-		_work_decomposition);
-    synchronize_if_enabled("RakingReduction");
-
-	
-	//
-	// Spine
-	//
-	
-	// GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels)
-	dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - _spine_scan_kernel_attrs.sharedSizeBytes : 0;
-	
-	SrtsScanSpine<void><<<_grid_size, B40C_RADIXSORT_SPINE_THREADS, dynamic_smem>>>(
-		converted_storage.d_spine,
-		converted_storage.d_spine,
-		_spine_elements);
-    synchronize_if_enabled("SrtsScanSpine");
-
-	
-	//
-	// Scanning Scatter
-	//
-	
-	// Run tesla flush kernel if we have two or more threadblocks for each of the SMs
-	if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast<unsigned int>(_device_props.multiProcessorCount * _cycle_elements * 2))) { 
-		FlushKernel<void><<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>();
-		synchronize_if_enabled("FlushKernel");
-	}
-
-	ScanScatterDigits<ConvertedKeyType, V, PASS, RADIX_BITS, BIT, PreprocessFunctor, PostprocessFunctor> <<<_grid_size, threads, 0>>>(
-		converted_storage.d_from_alt_storage,
-		converted_storage.d_spine,
-		converted_storage.d_keys,
-		converted_storage.d_alt_keys,
-		converted_storage.d_values,
-		converted_storage.d_alt_values,
-		_work_decomposition);
-    synchronize_if_enabled("ScanScatterDigits");
-
-	return cudaSuccess;
-}
-
-
-
-template <typename K, typename V>
-cudaError_t BaseRadixSortingEnactor<K, V>::
-EnactSort(RadixSortStorage<K, V> &problem_storage) 
-{
-	//
-	// Allocate device memory for temporary storage (if necessary)
-	//
-
-	if (problem_storage.d_alt_keys == NULL) {
-		cudaMalloc((void**) &problem_storage.d_alt_keys, _num_elements * sizeof(K));
-	}
-	if (!_keys_only && (problem_storage.d_alt_values == NULL)) {
-		cudaMalloc((void**) &problem_storage.d_alt_values, _num_elements * sizeof(V));
-	}
-	if (problem_storage.d_spine == NULL) {
-		cudaMalloc((void**) &problem_storage.d_spine, _spine_elements * sizeof(int));
-	}
-	if (problem_storage.d_from_alt_storage == NULL) {
-		cudaMalloc((void**) &problem_storage.d_from_alt_storage, 2 * sizeof(bool));
-	}
-
-	// Determine suitable type of unsigned byte storage to use for keys 
-	typedef typename KeyConversion<K>::UnsignedBits ConvertedKeyType;
-	
-	// Copy storage pointers to an appropriately typed stucture 
-	RadixSortStorage<ConvertedKeyType, V> converted_storage;
-	memcpy(&converted_storage, &problem_storage, sizeof(RadixSortStorage<K, V>));
-
-	// 
-	// Enact the sorting operation
-	//
-	
-	if (RADIXSORT_DEBUG) {
-		
-		printf("_device_sm_version: %d, _kernel_ptx_version: %d\n", _device_sm_version, _kernel_ptx_version);
-		printf("Bottom-level reduction & scan kernels:\n\tgrid_size: %d, \n\tthreads: %d, \n\tcycle_elements: %d, \n\tnum_big_blocks: %d, \n\tbig_block_elements: %d, \n\tnormal_block_elements: %d\n\textra_elements_last_block: %d\n\n",
-			_grid_size, B40C_RADIXSORT_THREADS, _cycle_elements, _work_decomposition.num_big_blocks, _work_decomposition.big_block_elements, _work_decomposition.normal_block_elements, _work_decomposition.extra_elements_last_block);
-		printf("Top-level spine scan:\n\tgrid_size: %d, \n\tthreads: %d, \n\tspine_block_elements: %d\n\n", 
-			_grid_size, B40C_RADIXSORT_SPINE_THREADS, _spine_elements);
-	}	
-
-	cudaError_t retval = EnactDigitPlacePasses(converted_storage);
-
-	
-	//
-	// Swizzle pointers if we left our sorted output in temp storage 
-	//
-	
-	if (_swizzle_pointers_for_odd_passes) {
-	
-		cudaMemcpy(
-			&problem_storage.using_alternate_storage, 
-			&problem_storage.d_from_alt_storage[_passes & 0x1], 
-			sizeof(bool), 
-			cudaMemcpyDeviceToHost);
-	
-		if (problem_storage.using_alternate_storage) {
-            thrust::swap<K*>(problem_storage.d_keys, problem_storage.d_alt_keys);
-			if (!_keys_only) {
-                thrust::swap<V*>(problem_storage.d_values, problem_storage.d_alt_values);
-			}
-		}
-	}
-	
-	return retval;
-}
-
-
-
-
-
-/******************************************************************************
- * Sorting enactor classes
- ******************************************************************************/
-
-/**
- * Generic sorting enactor class.  Simply create an instance of this class
- * with your key-type K (and optionally value-type V if sorting with satellite 
- * values).
- * 
- * Template specialization provides the appropriate enactor instance to handle 
- * the specified data types. 
- * 
- * @template-param K
- * 		Type of keys to be sorted
- *
- * @template-param V
- * 		Type of values to be sorted.
- *
- * @template-param ConvertedKeyType
- * 		Leave as default to effect necessary enactor specialization.
- */
-template <typename K, typename V = KeysOnlyType, typename ConvertedKeyType = typename KeyConversion<K>::UnsignedBits>
-class RadixSortingEnactor;
-
-
-
-/**
- * Sorting enactor that is specialized for for 8-bit key types
- */
-template <typename K, typename V>
-class RadixSortingEnactor<K, V, unsigned char> : public BaseRadixSortingEnactor<K, V>
-{
-protected:
-
-	typedef BaseRadixSortingEnactor<K, V> Base; 
-	typedef typename Base::ConvertedKeyType ConvertedKeyType;
-
-	cudaError_t EnactDigitPlacePasses(const RadixSortStorage<ConvertedKeyType, V> &converted_storage)
-	{
-		Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor<K>,      NopFunctor<ConvertedKeyType> >(converted_storage);
-		Base::template DigitPlacePass<1, 4, 4, NopFunctor<ConvertedKeyType>, PostprocessKeyFunctor<K> >    (converted_storage); 
-
-		return cudaSuccess;
-	}
-
-public:
-	
-	/**
-	 * Constructor.
-	 * 
-	 * @param[in] 		num_elements 
-	 * 		Length (in elements) of the input to a sorting operation
-	 * 
-	 * @param[in] 		max_grid_size  
-	 * 		Maximum allowable number of CTAs to launch.  The default value of 0 indicates 
-	 * 		that the dispatch logic should select an appropriate value for the target device.
-	 */	
-	RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(2, 4, num_elements, max_grid_size) {}
-
-};
-
-
-
-/**
- * Sorting enactor that is specialized for for 16-bit key types
- */
-template <typename K, typename V>
-class RadixSortingEnactor<K, V, unsigned short> : public BaseRadixSortingEnactor<K, V>
-{
-protected:
-
-	typedef BaseRadixSortingEnactor<K, V> Base; 
-	typedef typename Base::ConvertedKeyType ConvertedKeyType;
-
-	cudaError_t EnactDigitPlacePasses(const RadixSortStorage<ConvertedKeyType, V> &converted_storage)
-	{
-		Base::template DigitPlacePass<0, 4, 0,  PreprocessKeyFunctor<K>,      NopFunctor<ConvertedKeyType> >(converted_storage);
-		Base::template DigitPlacePass<1, 4, 4,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<2, 4, 8,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<3, 4, 12, NopFunctor<ConvertedKeyType>, PostprocessKeyFunctor<K> >    (converted_storage); 
-
-		return cudaSuccess;
-	}
-
-public:
-	
-	/**
-	 * Constructor.
-	 * 
-	 * @param[in] 		num_elements 
-	 * 		Length (in elements) of the input to a sorting operation
-	 * 
-	 * @param[in] 		max_grid_size  
-	 * 		Maximum allowable number of CTAs to launch.  The default value of 0 indicates 
-	 * 		that the dispatch logic should select an appropriate value for the target device.
-	 */	
-	RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(4, 4, num_elements, max_grid_size) {}
-
-};
-
-
-/**
- * Sorting enactor that is specialized for for 32-bit key types
- */
-template <typename K, typename V>
-class RadixSortingEnactor<K, V, unsigned int> : public BaseRadixSortingEnactor<K, V>
-{
-protected:
-
-	typedef BaseRadixSortingEnactor<K, V> Base; 
-	typedef typename Base::ConvertedKeyType ConvertedKeyType;
-
-	cudaError_t EnactDigitPlacePasses(const RadixSortStorage<ConvertedKeyType, V> &converted_storage)
-	{
-		Base::template DigitPlacePass<0, 4, 0,  PreprocessKeyFunctor<K>,      NopFunctor<ConvertedKeyType> >(converted_storage);
-		Base::template DigitPlacePass<1, 4, 4,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<2, 4, 8,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<3, 4, 12, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<4, 4, 16, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<5, 4, 20, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<6, 4, 24, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<7, 4, 28, NopFunctor<ConvertedKeyType>, PostprocessKeyFunctor<K> >    (converted_storage); 
-
-		return cudaSuccess;
-	}
-
-public:
-	
-	/**
-	 * Constructor.
-	 * 
-	 * @param[in] 		num_elements 
-	 * 		Length (in elements) of the input to a sorting operation
-	 * 
-	 * @param[in] 		max_grid_size  
-	 * 		Maximum allowable number of CTAs to launch.  The default value of 0 indicates 
-	 * 		that the dispatch logic should select an appropriate value for the target device.
-	 */	
-	RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(8, 4, num_elements, max_grid_size) {}
-
-};
-
-
-
-/**
- * Sorting enactor that is specialized for for 64-bit key types
- */
-template <typename K, typename V>
-class RadixSortingEnactor<K, V, unsigned long long> : public BaseRadixSortingEnactor<K, V>
-{
-protected:
-
-	typedef BaseRadixSortingEnactor<K, V> Base; 
-	typedef typename Base::ConvertedKeyType ConvertedKeyType;
-
-	cudaError_t EnactDigitPlacePasses(const RadixSortStorage<ConvertedKeyType, V> &converted_storage)
-	{
-		Base::template DigitPlacePass<0,  4, 0,  PreprocessKeyFunctor<K>,      NopFunctor<ConvertedKeyType> >(converted_storage);
-		Base::template DigitPlacePass<1,  4, 4,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<2,  4, 8,  NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<3,  4, 12, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<4,  4, 16, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<5,  4, 20, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<6,  4, 24, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<7,  4, 28, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<8,  4, 32, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage);
-		Base::template DigitPlacePass<9,  4, 36, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<10, 4, 40, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<11, 4, 44, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<12, 4, 48, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<13, 4, 52, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<14, 4, 56, NopFunctor<ConvertedKeyType>, NopFunctor<ConvertedKeyType> >(converted_storage); 
-		Base::template DigitPlacePass<15, 4, 60, NopFunctor<ConvertedKeyType>, PostprocessKeyFunctor<K> >    (converted_storage); 
-
-		return cudaSuccess;
-	}
-
-public:
-	
-	/**
-	 * Constructor.
-	 * 
-	 * @param[in] 		num_elements 
-	 * 		Length (in elements) of the input to a sorting operation
-	 * 
-	 * @param[in] 		max_grid_size  
-	 * 		Maximum allowable number of CTAs to launch.  The default value of 0 indicates 
-	 * 		that the dispatch logic should select an appropriate value for the target device.
-	 */	
-	RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(16, 4, num_elements, max_grid_size) {}
-
-};
-
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h
deleted file mode 100644
index 7899dc3c0e..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/******************************************************************************
- * 
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- * 
- ******************************************************************************/
-
-
-/******************************************************************************
- * Configuration management for B40C radix sorting kernels  
- ******************************************************************************/
-
-#pragma once
-
-#include "kernel_utils.h"
-#include "vector_types.h"
-#include "radixsort_key_conversion.h"
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-
-/******************************************************************************
- * Radix sorting configuration  
- ******************************************************************************/
-
-// 128 threads
-#define B40C_RADIXSORT_LOG_THREADS						7								
-#define B40C_RADIXSORT_THREADS							(1 << B40C_RADIXSORT_LOG_THREADS)	
-
-// Target threadblock occupancy for counting/reduction kernel
-#define B40C_SM20_REDUCE_CTA_OCCUPANCY()					(8)			// 8 threadblocks on GF100
-#define B40C_SM12_REDUCE_CTA_OCCUPANCY()					(5)			// 5 threadblocks on GT200
-#define B40C_SM10_REDUCE_CTA_OCCUPANCY()					(3)			// 4 threadblocks on G80
-#define B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(version)		((version >= 200) ? B40C_SM20_REDUCE_CTA_OCCUPANCY() : 	\
-			        										 (version >= 120) ? B40C_SM12_REDUCE_CTA_OCCUPANCY() : 	\
-					        													B40C_SM10_REDUCE_CTA_OCCUPANCY())		
-													                    
-// Target threadblock occupancy for bulk scan/scatter kernel
-#define B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY()				(7)			// 7 threadblocks on GF100
-#define B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY()				(5)			// 5 threadblocks on GT200
-#define B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY()				(2)			// 2 threadblocks on G80
-#define B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(version)	((version >= 200) ? B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY() : 	\
-			    											 (version >= 120) ? B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY() : 	\
-				    															B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY())		
-
-// Number of 256-element sets to rake per raking pass
-#define B40C_SM20_LOG_SETS_PER_PASS()					(1)			// 2 sets on GF100
-#define B40C_SM12_LOG_SETS_PER_PASS()					(0)			// 1 set on GT200
-#define B40C_SM10_LOG_SETS_PER_PASS()					(1)			// 2 sets on G80
-#define B40C_RADIXSORT_LOG_SETS_PER_PASS(version)		((version >= 200) ? B40C_SM20_LOG_SETS_PER_PASS() : 	\
-			     										 (version >= 120) ? B40C_SM12_LOG_SETS_PER_PASS() : 	\
-				    														B40C_SM10_LOG_SETS_PER_PASS())		
-
-// Number of raking passes per cycle
-#define B40C_SM20_LOG_PASSES_PER_CYCLE(K, V)					(((B40C_MAX(sizeof(K), sizeof(V)) > 4) || _B40C_LP64_) ? 0 : 1)	// 2 passes on GF100 (only one for large keys/values, or for 64-bit device pointers)
-#define B40C_SM12_LOG_PASSES_PER_CYCLE(K, V)					(B40C_MAX(sizeof(K), sizeof(V)) > 4 ? 0 : 1)					// 2 passes on GT200 (only for large keys/values)
-#define B40C_SM10_LOG_PASSES_PER_CYCLE(K, V)					(0)																// 1 pass on G80
-#define B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V)	((version >= 200) ? B40C_SM20_LOG_PASSES_PER_CYCLE(K, V) : 	\
-				    										 (version >= 120) ? B40C_SM12_LOG_PASSES_PER_CYCLE(K, V) : 	\
-					    														B40C_SM10_LOG_PASSES_PER_CYCLE(K, V))		
-
-
-// Number of raking threads per raking pass
-#define B40C_SM20_LOG_RAKING_THREADS_PER_PASS()				(B40C_LOG_WARP_THREADS + 1)		// 2 raking warps on GF100
-#define B40C_SM12_LOG_RAKING_THREADS_PER_PASS()				(B40C_LOG_WARP_THREADS)			// 1 raking warp on GT200
-#define B40C_SM10_LOG_RAKING_THREADS_PER_PASS()				(B40C_LOG_WARP_THREADS + 2)		// 4 raking warps on G80
-#define B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(version)	((version >= 200) ? B40C_SM20_LOG_RAKING_THREADS_PER_PASS() : 	\
-				    										 (version >= 120) ? B40C_SM12_LOG_RAKING_THREADS_PER_PASS() : 	\
-					    														B40C_SM10_LOG_RAKING_THREADS_PER_PASS())		
-
-
-// Number of elements per cycle
-#define B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V)		(B40C_RADIXSORT_LOG_SETS_PER_PASS(version) + B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V) + B40C_RADIXSORT_LOG_THREADS + 1)
-#define B40C_RADIXSORT_CYCLE_ELEMENTS(version, K, V)			(1 << B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V))
-
-// Number of warps per CTA
-#define B40C_RADIXSORT_LOG_WARPS								(B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS)
-#define B40C_RADIXSORT_WARPS									(1 << B40C_RADIXSORT_LOG_WARPS)
-
-// Number of threads for spine-scanning kernel
-#define B40C_RADIXSORT_LOG_SPINE_THREADS						7		// 128 threads
-#define B40C_RADIXSORT_SPINE_THREADS							(1 << B40C_RADIXSORT_LOG_SPINE_THREADS)	
-
-// Number of elements per spine-scanning cycle
-#define B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS  				9		// 512 elements
-#define B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS		    			(1 << B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS)
-
-
-
-/******************************************************************************
- * SRTS Control Structures
- ******************************************************************************/
-
-
-/**
- * Value-type structure denoting keys-only sorting
- */
-struct KeysOnlyType {};
-
-/**
- * Returns whether or not the templated type indicates keys-only sorting
- */
-template <typename V>
-inline __host__ __device__ bool IsKeysOnly() {return false;}
-
-
-/**
- * Returns whether or not the templated type indicates keys-only sorting
- */
-template <>
-inline __host__ __device__ bool IsKeysOnly<KeysOnlyType>() {return true;}
-
-
-/**
- * A given threadblock may receive one of three different amounts of 
- * work: "big", "normal", and "last".  The big workloads are one
- * cycle_elements greater than the normal, and the last workload 
- * does the extra (problem-size % cycle_elements) work.
- */
-struct CtaDecomposition {
-	unsigned int num_big_blocks;
-	unsigned int big_block_elements;
-	unsigned int normal_block_elements;
-	unsigned int extra_elements_last_block;
-	unsigned int num_elements;
-};
-
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
deleted file mode 100644
index a170f95e6c..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
+++ /dev/null
@@ -1,352 +0,0 @@
-/******************************************************************************
- * 
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- * 
- ******************************************************************************/
-
-
-/******************************************************************************
- * Functors for converting signed and floating point types to unsigned types
- * suitable for radix sorting  
- ******************************************************************************/
-
-#pragma once
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-
-//
-// Do-nothing functors
-//
-
-template <typename T>
-struct NopFunctor{
-    template <typename ConvertedKeyType>
-	__device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return false;}
-};
-
-//
-// Do-nothing functors that indicate a mandatory pass
-//
-
-template <typename T>
-struct MandatoryPassNopFunctor{
-    template <typename ConvertedKeyType>
-	__device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return false;}
-};
-
-
-//
-// Conversion for generic unsigned types
-//
-
-template <typename T> struct KeyConversion {
-	typedef T UnsignedBits;
-};
-
-template <typename T>
-struct PreprocessKeyFunctor{
-    template <typename ConvertedKeyType>
-	__device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return false;}
-};
-
-template <typename T>
-struct PostprocessKeyFunctor {
-    template <typename ConvertedKeyType>
-	__device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return false;}
-};
-
-
-
-//
-// Conversion for floats
-//
-
-template <> struct KeyConversion<float> {
-	typedef unsigned int UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<float> {
-	__device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
-
-		unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000; 
-		converted_key ^= mask;
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<float> {
-	__device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
-
-		unsigned int mask = (converted_key & 0x80000000) ? 0x80000000 : 0xffffffff; 
-		converted_key ^= mask;
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-
-//
-// Conversion for doubles
-//
-
-template <> struct KeyConversion<double> {
-	typedef unsigned long long UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<double> {
-	__device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
-
-		unsigned long long mask = (converted_key & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; 
-		converted_key ^= mask;
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<double> {
-	__device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key)  {
-		unsigned long long mask = (converted_key & 0x8000000000000000) ? 0x8000000000000000 : 0xffffffffffffffff; 
-        converted_key ^= mask;
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-//
-// Conversion for signed chars
-//
-
-template <> struct KeyConversion<char> {
-  typedef unsigned char UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<char> {
-  __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
-    // char is unsigned on some platforms, so we have to check
-    if(std::numeric_limits<char>::is_signed)
-    {
-      const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
-      converted_key ^= SIGN_MASK;	
-    }
-  }
-  __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits<char>::is_signed;}
-};
-
-template <>
-struct PostprocessKeyFunctor<char> {
-  __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key)  {
-    // char is unsigned on some platforms, so we have to check
-    if(std::numeric_limits<char>::is_signed)
-    {
-      const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
-      converted_key ^= SIGN_MASK;	
-    }
-  }
-  __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits<char>::is_signed;}
-};
-
-
-// TODO handle this more gracefully
-template <> struct KeyConversion<signed char> {
-	typedef unsigned char UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<signed char> {
-	__device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<signed char> {
-	__device__ __host__ __forceinline__ void operator()(unsigned char &converted_key)  {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-//
-// Conversion for signed shorts
-//
-
-template <> struct KeyConversion<short> {
-	typedef unsigned short UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<short> {
-	__device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<short> {
-	__device__ __host__ __forceinline__ void operator()(unsigned short &converted_key)  {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-
-//
-// Conversion for signed ints
-//
-
-template <> struct KeyConversion<int> {
-	typedef unsigned int UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<int> {
-	__device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<int> {
-	__device__ __host__ __forceinline__ void operator()(unsigned int &converted_key)  {
-		const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-
-//
-// Conversion for signed longs
-//
-
-// TODO rework this with metaprogramming
-template <> struct KeyConversion<unsigned long> {
-#if ULONG_MAX == UINT_MAX
-    typedef unsigned int UnsignedBits;
-#else
-    typedef unsigned long long UnsignedBits;
-#endif
-};
-
-// TODO rework this with metaprogramming
-template <> struct KeyConversion<long> {
-#if ULONG_MAX == UINT_MAX
-    typedef unsigned int UnsignedBits;
-#else
-    typedef unsigned long long UnsignedBits;
-#endif
-};
-
-template <>
-struct PreprocessKeyFunctor<long> {
-	__device__ __host__ __forceinline__ void operator()(typename KeyConversion<long>::UnsignedBits& converted_key) {
-		const typename KeyConversion<long>::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<long> {
-	__device__ __host__ __forceinline__ void operator()(typename KeyConversion<long>::UnsignedBits& converted_key) {
-		const typename KeyConversion<long>::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-
-//
-// Conversion for signed long longs 
-//
-
-template <> struct KeyConversion<long long> {
-	typedef unsigned long long UnsignedBits;
-};
-
-template <>
-struct PreprocessKeyFunctor<long long> {
-	__device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) {
-		const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-	}
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-template <>
-struct PostprocessKeyFunctor<long long> {
-	__device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key)  {
-		const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1);
-		converted_key ^= SIGN_MASK;	
-    }
-	__device__ __host__ __forceinline__ static bool MustApply(){ return true;}
-};
-
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h
deleted file mode 100644
index a8f91d3d24..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h
+++ /dev/null
@@ -1,439 +0,0 @@
-/******************************************************************************
- * 
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- * 
- ******************************************************************************/
-
-
-/******************************************************************************
- * Bottom-level digit-reduction/counting kernel
- ******************************************************************************/
-
-#pragma once
-
-#include "radixsort_kernel_common.h"
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-/******************************************************************************
- * Defines
- ******************************************************************************/
-
-const int BYTE_ENCODE_SHIFT = 0x3;
-
-
-/******************************************************************************
- * Cycle-processing Routines
- ******************************************************************************/
-
-__device__ __forceinline__ int DecodeInt(int encoded, int quad_byte){
-	return (encoded >> quad_byte) & 0xff;		// shift right 8 bits per digit and return rightmost 8 bits
-}
-
-
-__device__ __forceinline__ int EncodeInt(int count, int quad_byte) {
-	return count << quad_byte;					// shift left 8 bits per digit
-}
-
-
-template <typename K, long long RADIX_DIGITS, int BIT>
-__device__ __forceinline__ void DecodeDigit(
-	K key, 
-	int &lane, 
-	int &quad_shift) 
-{
-	const K DIGIT_MASK = RADIX_DIGITS - 1;
-	lane = (key & (DIGIT_MASK << BIT)) >> (BIT + 2);
-	
-	const K QUAD_MASK = (RADIX_DIGITS < 4) ? 0x1 : 0x3;
-	if (BIT == 32) {
-		// N.B.: This takes one more instruction than the code below it, but 
-		// otherwise the compiler goes nuts and shoves hundreds of bytes 
-		// to lmem when bit = 32 on 64-bit keys.		
-		quad_shift = ((key >> BIT) & QUAD_MASK) << BYTE_ENCODE_SHIFT;	
-	} else {
-		quad_shift = MagnitudeShift<K, BYTE_ENCODE_SHIFT - BIT>(key & (QUAD_MASK << BIT));
-	}
-}
-
-
-template <int RADIX_DIGITS, int SCAN_LANES, int LANES_PER_WARP, int BIT, bool FINAL_REDUCE>
-__device__ __forceinline__ void ReduceEncodedCounts(
-	int local_counts[LANES_PER_WARP][4],
-	int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) 
-{
-	const int LOG_PARTIALS_PER_THREAD = B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS;
-	const int PARTIALS_PER_THREAD = 1 << LOG_PARTIALS_PER_THREAD;
-	
-	int encoded;
-	int idx = threadIdx.x & (B40C_WARP_THREADS - 1);
-	
-	
-	__syncthreads();
-
-	#pragma unroll
-	for (int j = 0; j < (int) LANES_PER_WARP; j++) {
-		
-		int warp_id = (threadIdx.x >> B40C_LOG_WARP_THREADS) + (j * B40C_RADIXSORT_WARPS);
-		if (warp_id < SCAN_LANES) {
-
-			// rest of my elements
-			#pragma unroll
-			for (int i = 0; i < (int) PARTIALS_PER_THREAD; i++) {
-				encoded = encoded_carry[warp_id][idx + (i * B40C_WARP_THREADS)];		
-				local_counts[j][0] += DecodeInt(encoded, 0u << BYTE_ENCODE_SHIFT);
-				local_counts[j][1] += DecodeInt(encoded, 1u << BYTE_ENCODE_SHIFT);
-				local_counts[j][2] += DecodeInt(encoded, 2u << BYTE_ENCODE_SHIFT);
-				local_counts[j][3] += DecodeInt(encoded, 3u << BYTE_ENCODE_SHIFT);
-			}
-			
-			if (FINAL_REDUCE) {
-				// reduce all four packed fields, leaving them in the first four elements of our row
-				WarpReduce<B40C_WARP_THREADS>(idx, &encoded_carry[warp_id][0], local_counts[j][0]);
-				WarpReduce<B40C_WARP_THREADS>(idx, &encoded_carry[warp_id][1], local_counts[j][1]);
-				WarpReduce<B40C_WARP_THREADS>(idx, &encoded_carry[warp_id][2], local_counts[j][2]);
-				WarpReduce<B40C_WARP_THREADS>(idx, &encoded_carry[warp_id][3], local_counts[j][3]);
-			}
-		}
-	}	
-
-	__syncthreads();
-	
-}
-	
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-__device__ __forceinline__ void Bucket(
-	K input, 
-	int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS],
-	PreprocessFunctor preprocess = PreprocessFunctor()) 
-{
-	int lane, quad_shift;
-	preprocess(input);
-	DecodeDigit<K, RADIX_DIGITS, BIT>(input, lane, quad_shift);
-	encoded_carry[lane][threadIdx.x] += EncodeInt(1, quad_shift);
-}
-
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor, int CYCLES>
-struct LoadOp;
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 1> 
-{
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		K key = d_in_keys[offset + threadIdx.x];
-		Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(key, encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 2> 
-{
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 1>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 1>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 1), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 4> 
-{
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 2>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 2>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 2), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 8> 
-{
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-			K keys[8];
-				
-			keys[0] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 0) + threadIdx.x];
-			keys[1] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 1) + threadIdx.x];
-			keys[2] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 2) + threadIdx.x];
-			keys[3] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 3) + threadIdx.x];
-
-			if (B40C_FERMI(__CUDA_ARCH__)) __syncthreads();
-			
-			keys[4] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 4) + threadIdx.x];
-			keys[5] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 5) + threadIdx.x];
-			keys[6] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 6) + threadIdx.x];
-			keys[7] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 7) + threadIdx.x];
-			
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[0], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[1], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[2], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[3], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[4], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[5], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[6], encoded_carry);
-			Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(keys[7], encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 16> {
-
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 8>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 8>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 8), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 32> {
-
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 16>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 16>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 16), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 64> {
-
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 32>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 32>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 32), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 128> {
-
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 64>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 64>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 64), encoded_carry);
-	}
-};
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int BIT, typename PreprocessFunctor>
-struct LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 252> {
-
-	static __device__ __forceinline__  void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-	{
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 128>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 64>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 128), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 32>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 192), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 16>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 224), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 8>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 240), encoded_carry);
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 4>::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 248), encoded_carry);
-	}
-};
-
-
-template <int SCAN_LANES>
-__device__ __forceinline__ void ResetEncodedCarry(
-	int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS])
-{
-	#pragma unroll
-	for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES; SCAN_LANE++) {
-		encoded_carry[SCAN_LANE][threadIdx.x] = 0;
-	}
-}
-
-
-template <typename K, int RADIX_DIGITS, int SCAN_LANES, int LANES_PER_WARP, int BIT, typename PreprocessFunctor>
-__device__ __forceinline__ int ProcessLoads(
-	K *d_in_keys,
-	int loads,
-	int &offset,
-	int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS],
-	int local_counts[LANES_PER_WARP][4])
-{
-	// Unroll batches of loads with occasional reduction to avoid overflow
-	while (loads >= 32) {
-	
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 32>::BlockOfLoads(d_in_keys, offset, encoded_carry);
-		offset += B40C_RADIXSORT_THREADS * 32;
-		loads -= 32;
-
-		// Reduce int local count registers to prevent overflow
-		ReduceEncodedCounts<RADIX_DIGITS, SCAN_LANES, LANES_PER_WARP, BIT, false>(
-				local_counts, 
-				encoded_carry);
-		
-		// Reset encoded counters
-		ResetEncodedCarry<SCAN_LANES>(encoded_carry);
-	} 
-	
-	int retval = loads;
-	
-	// Wind down loads in decreasing batch sizes
-
-	while (loads >= 4) {
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 4>::BlockOfLoads(d_in_keys, offset, encoded_carry);
-		offset += B40C_RADIXSORT_THREADS * 4;
-		loads -= 4;
-	} 
-
-	while (loads) {
-		LoadOp<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor, 1>::BlockOfLoads(d_in_keys, offset, encoded_carry);
-		offset += B40C_RADIXSORT_THREADS * 1;
-		loads--;
-	}
-	
-	return retval;
-}
-
-
-/******************************************************************************
- * Reduction/counting Kernel Entry Point
- ******************************************************************************/
-
-template <typename K, typename V, int PASS, int RADIX_BITS, int BIT, typename PreprocessFunctor>
-__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(__CUDA_ARCH__))
-__global__ 
-void RakingReduction(
-	bool *d_from_alt_storage,
-	int *d_spine,
-	K *d_in_keys,
-	K *d_out_keys,
-	CtaDecomposition work_decomposition)
-{
-	const int RADIX_DIGITS 		= 1 << RADIX_BITS;
-
-	const int LOG_SCAN_LANES 		= (RADIX_BITS >= 2) ? RADIX_BITS - 2 : 0;	// Always at least one fours group
-	const int SCAN_LANES 			= 1 << LOG_SCAN_LANES;
-
-	const int LOG_LANES_PER_WARP 	= (SCAN_LANES > B40C_RADIXSORT_WARPS) ? LOG_SCAN_LANES - B40C_RADIXSORT_LOG_WARPS : 0;	// Always at least one fours group per warp
-	const int LANES_PER_WARP 		= 1 << LOG_LANES_PER_WARP;
-	
-	
-	// Each thread gets its own column of fours-groups (for conflict-free updates)
-	__shared__ int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS];			
-
-	// Each thread is also responsible for aggregating an unencoded segment of a fours-group
-	int local_counts[LANES_PER_WARP][4];								
-
-	// Determine where to read our input
-	bool from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1];
-	if (from_alt_storage) d_in_keys = d_out_keys;
-	
-	// Calculate our threadblock's range
-	int offset, block_elements;
-	if (blockIdx.x < work_decomposition.num_big_blocks) {
-		offset = work_decomposition.big_block_elements * blockIdx.x;
-		block_elements = work_decomposition.big_block_elements;
-	} else {
-		offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V));
-		block_elements = work_decomposition.normal_block_elements;
-	}
-	
-	// Initialize local counts
-	#pragma unroll 
-	for (int LANE = 0; LANE < (int) LANES_PER_WARP; LANE++) {
-		local_counts[LANE][0] = 0;
-		local_counts[LANE][1] = 0;
-		local_counts[LANE][2] = 0;
-		local_counts[LANE][3] = 0;
-	}
-	
-	// Reset encoded counters
-	ResetEncodedCarry<SCAN_LANES>(encoded_carry);
-	
-	// Process loads
-	int loads = block_elements >> B40C_RADIXSORT_LOG_THREADS;
-	int unreduced_loads = ProcessLoads<K, RADIX_DIGITS, SCAN_LANES, LANES_PER_WARP, BIT, PreprocessFunctor>(
-		d_in_keys,
-		loads,
-		offset,
-		encoded_carry,
-		local_counts);
-	
-	// Cleanup if we're the last block  
-	if ((blockIdx.x == gridDim.x - 1) && (work_decomposition.extra_elements_last_block)) {
-
-		const int LOADS_PER_CYCLE = B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) / B40C_RADIXSORT_THREADS;
-		
-		// If extra guarded loads may cause overflow, reduce now and reset counters
-		if (unreduced_loads + LOADS_PER_CYCLE > 255) {
-		
-			ReduceEncodedCounts<RADIX_DIGITS, SCAN_LANES, LANES_PER_WARP, BIT, false>(
-					local_counts, 
-					encoded_carry);
-			
-			ResetEncodedCarry<SCAN_LANES>(encoded_carry);
-		}
-		
-		// perform up to LOADS_PER_CYCLE extra guarded loads
-		#pragma unroll
-		for (int EXTRA_LOAD = 0; EXTRA_LOAD < (int) LOADS_PER_CYCLE; EXTRA_LOAD++) {
-			if (threadIdx.x + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) < work_decomposition.extra_elements_last_block) {
-				K key = d_in_keys[offset + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) + threadIdx.x];
-				Bucket<K, RADIX_DIGITS, SCAN_LANES, BIT, PreprocessFunctor>(key, encoded_carry);
-			}
-		}
-	}
-	
-	// Aggregate 
-	ReduceEncodedCounts<RADIX_DIGITS, SCAN_LANES, LANES_PER_WARP, BIT, true>(
-		local_counts, 
-		encoded_carry);
-
-	// Write carry in parallel (carries per row are in the first four bytes of each row) 
-	if (threadIdx.x < RADIX_DIGITS) {
-
-		int row = threadIdx.x >> 2;		
-		int col = threadIdx.x & 3;			 
-		d_spine[(gridDim.x * threadIdx.x) + blockIdx.x] = encoded_carry[row][col];
-	}
-} 
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h
deleted file mode 100644
index 1377999c76..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h
+++ /dev/null
@@ -1,1207 +0,0 @@
-/******************************************************************************
- * 
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- * 
- ******************************************************************************/
-
-
-/******************************************************************************
-// Bottom-level digit scanning/scattering kernel
- ******************************************************************************/
-
-#pragma once
-
-#include "radixsort_kernel_common.h"
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-/******************************************************************************
- * Appropriate substitutes to use for out-of-bounds key (and value) offsets 
- ******************************************************************************/
-
-template <typename T> 
-__device__ __forceinline__ T DefaultextraValue() {
-	return T();
-}
-
-template <> 
-__device__ __forceinline__ unsigned char DefaultextraValue<unsigned char>() {
-	return (unsigned char) -1;
-}
-
-template <> 
-__device__ __forceinline__ unsigned short DefaultextraValue<unsigned short>() {
-	return (unsigned short) -1;
-}
-
-template <> 
-__device__ __forceinline__ unsigned int DefaultextraValue<unsigned int>() {
-	return (unsigned int) -1u;
-}
-
-template <> 
-__device__ __forceinline__ unsigned long DefaultextraValue<unsigned long>() {
-	return (unsigned long) -1ul;
-}
-
-template <> 
-__device__ __forceinline__ unsigned long long DefaultextraValue<unsigned long long>() {
-	return (unsigned long long) -1ull;
-}
-
-
-/******************************************************************************
- * Cycle-processing Routines
- ******************************************************************************/
-
-template <typename K, long long RADIX_DIGITS, int BIT>
-__device__ __forceinline__ int DecodeDigit(K key) 
-{
-	const K DIGIT_MASK = RADIX_DIGITS - 1;
-	return (key >> BIT) & DIGIT_MASK;
-}
-
-
-template <typename K, long long RADIX_DIGITS, int BIT, int PADDED_PARTIALS_PER_LANE>
-__device__ __forceinline__ void DecodeDigit(
-	K key, 
-	int &digit, 
-	int &flag_offset,		// in bytes
-	const int SET_OFFSET)
-{
-	const int PADDED_BYTES_PER_LANE 	= PADDED_PARTIALS_PER_LANE * 4;
-	const int SET_OFFSET_BYTES 		= SET_OFFSET * 4;
-	const K QUAD_MASK 							= (RADIX_DIGITS < 4) ? 0x1 : 0x3;
-	
-	digit = DecodeDigit<K, RADIX_DIGITS, BIT>(key);
-	int lane = digit >> 2;
-	int quad_byte = digit & QUAD_MASK;
-
-	flag_offset = SET_OFFSET_BYTES + FastMul(lane, PADDED_BYTES_PER_LANE) + quad_byte;
-}
-
-
-template <typename K, long long RADIX_DIGITS, int BIT, int SETS_PER_PASS, int SCAN_LANES_PER_SET, int PADDED_PARTIALS_PER_LANE>
-__device__ __forceinline__ void DecodeDigits(
-	typename VecType<K, 2>::Type keypairs[SETS_PER_PASS],
-	int2 digits[SETS_PER_PASS],
-	int2 flag_offsets[SETS_PER_PASS])		// in bytes 
-{
-
-	#pragma unroll
-	for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-		
-		const int SET_OFFSET = SET * SCAN_LANES_PER_SET * PADDED_PARTIALS_PER_LANE;
-
-		DecodeDigit<K, RADIX_DIGITS, BIT, PADDED_PARTIALS_PER_LANE>(
-				keypairs[SET].x, digits[SET].x, flag_offsets[SET].x, SET_OFFSET);
-		
-		DecodeDigit<K, RADIX_DIGITS, BIT, PADDED_PARTIALS_PER_LANE>(
-				keypairs[SET].y, digits[SET].y, flag_offsets[SET].y, SET_OFFSET);
-	}
-}
-
-
-template <typename T, typename PreprocessFunctor>
-__device__ __forceinline__ void GuardedReadSet(
-	T *in, 
-	typename VecType<T, 2>::Type &pair,
-	int offset,
-	int extra[1],
-	PreprocessFunctor preprocess = PreprocessFunctor())				
-{
-	if (offset - extra[0] < 0) {
-		pair.x = in[offset];
-		preprocess(pair.x);
-	} else {
-		pair.x = DefaultextraValue<T>();
-	}
-	
-	if (offset + 1 - extra[0] < 0) {
-		pair.y = in[offset + 1];
-		preprocess(pair.y);
-	} else {
-		pair.y = DefaultextraValue<T>();
-	}
-}
-
-
-template <typename T, bool UNGUARDED_IO, int SETS_PER_PASS, typename PreprocessFunctor>
-__device__ __forceinline__ void ReadSets(
-	typename VecType<T, 2>::Type *d_in, 
-	typename VecType<T, 2>::Type pairs[SETS_PER_PASS],
-	const int BASE2,
-	int extra[1],
-	PreprocessFunctor preprocess = PreprocessFunctor())				
-{
-	if (UNGUARDED_IO) {
-
-		// N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower
-		if (SETS_PER_PASS > 0) pairs[0] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 0)];
-		if (SETS_PER_PASS > 1) pairs[1] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 1)];
-		if (SETS_PER_PASS > 2) pairs[2] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 2)];
-		if (SETS_PER_PASS > 3) pairs[3] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 3)];
-
-		#pragma unroll 
-		for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-			preprocess(pairs[SET].x);
-			preprocess(pairs[SET].y);
-		}
-		
-	} else {
-
-		T* in = (T*) d_in;
-		
-		// N.B. --  I wish we could do some pragma unrolling here, but the compiler won't let 
-		// us with user-defined value types (e.g., Fribbitz): "Advisory: Loop was not unrolled, cannot deduce loop trip count"
-		
-		if (SETS_PER_PASS > 0) GuardedReadSet<T, PreprocessFunctor>(in, pairs[0], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 0), extra);
-		if (SETS_PER_PASS > 1) GuardedReadSet<T, PreprocessFunctor>(in, pairs[1], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 1), extra);
-		if (SETS_PER_PASS > 2) GuardedReadSet<T, PreprocessFunctor>(in, pairs[2], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 2), extra);
-		if (SETS_PER_PASS > 3) GuardedReadSet<T, PreprocessFunctor>(in, pairs[3], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 3), extra);
-	}
-}
-
-
-template <int SETS_PER_PASS>
-__device__ __forceinline__ void PlacePartials(
-	unsigned char * base_partial,
-	int2 digits[SETS_PER_PASS],
-	int2 flag_offsets[SETS_PER_PASS]) 
-{
-	#pragma unroll
-	for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-		base_partial[flag_offsets[SET].x] = 1;
-		base_partial[flag_offsets[SET].y] = 1 + (digits[SET].x == digits[SET].y);
-	}
-}
-
-
-template <int SETS_PER_PASS>
-__device__ __forceinline__ void ExtractRanks(
-	unsigned char * base_partial,
-	int2 digits[SETS_PER_PASS],
-	int2 flag_offsets[SETS_PER_PASS],
-	int2 ranks[SETS_PER_PASS]) 
-{
-	#pragma unroll
-	for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-		ranks[SET].x = base_partial[flag_offsets[SET].x];
-		ranks[SET].y = base_partial[flag_offsets[SET].y] + (digits[SET].x == digits[SET].y);
-	}
-}
-
-
-template <int RADIX_DIGITS, int SETS_PER_PASS>
-__device__ __forceinline__ void UpdateRanks(
-	int2 digits[SETS_PER_PASS],
-	int2 ranks[SETS_PER_PASS],
-	int digit_counts[SETS_PER_PASS][RADIX_DIGITS])
-{
-	// N.B.: I wish we could pragma unroll here, but doing so currently 
-	// results in the 3.1 compilier on 64-bit platforms generating bad
-	// code for SM1.3, resulting in incorrect sorting (e.g., problem size 16)
-	
-	if (SETS_PER_PASS > 0) {
-		ranks[0].x += digit_counts[0][digits[0].x];
-		ranks[0].y += digit_counts[0][digits[0].y]; 
-	}	
-	if (SETS_PER_PASS > 1) {
-		ranks[1].x += digit_counts[1][digits[1].x];
-		ranks[1].y += digit_counts[1][digits[1].y]; 
-	}	
-	if (SETS_PER_PASS > 2) {
-		ranks[2].x += digit_counts[2][digits[2].x];
-		ranks[2].y += digit_counts[2][digits[2].y]; 
-	}	
-	if (SETS_PER_PASS > 3) {
-		ranks[3].x += digit_counts[3][digits[3].x];
-		ranks[3].y += digit_counts[3][digits[3].y]; 
-	}	
-}
-
-template <int RADIX_DIGITS, int PASSES_PER_CYCLE, int SETS_PER_PASS>
-__device__ __forceinline__ void UpdateRanks(
-	int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS])
-{
-	// N.B.: I wish we could pragma unroll here, but doing so currently 
-	// results in the 3.1 compilier on 64-bit platforms generating bad
-	// code for SM1.3, resulting in incorrect sorting (e.g., problem size 16)
-	
-	if (PASSES_PER_CYCLE > 0) UpdateRanks<RADIX_DIGITS, SETS_PER_PASS>(digits[0], ranks[0], digit_counts[0]);
-	if (PASSES_PER_CYCLE > 1) UpdateRanks<RADIX_DIGITS, SETS_PER_PASS>(digits[1], ranks[1], digit_counts[1]);
-	if (PASSES_PER_CYCLE > 2) UpdateRanks<RADIX_DIGITS, SETS_PER_PASS>(digits[2], ranks[2], digit_counts[2]);
-	if (PASSES_PER_CYCLE > 3) UpdateRanks<RADIX_DIGITS, SETS_PER_PASS>(digits[3], ranks[3], digit_counts[3]);
-}
-
-
-
-template <int SCAN_LANES_PER_PASS, int LOG_RAKING_THREADS_PER_LANE, int RAKING_THREADS_PER_LANE, int PARTIALS_PER_SEG>
-__device__ __forceinline__ void PrefixScanOverLanes(
-	int 	raking_segment[],
-	int 	warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE],
-	int 	copy_section)
-{
-	// Upsweep rake
-	int partial_reduction = SerialReduce<PARTIALS_PER_SEG>(raking_segment);
-
-	// Warpscan reduction in digit warpscan_lane
-	int warpscan_lane = threadIdx.x >> LOG_RAKING_THREADS_PER_LANE;
-	int group_prefix = WarpScan<RAKING_THREADS_PER_LANE, true>(
-		warpscan[warpscan_lane], 
-		partial_reduction,
-		copy_section);
-
-	// Downsweep rake
-	SerialScan<PARTIALS_PER_SEG>(raking_segment, group_prefix);
-	
-}
-
-
-template <int SCAN_LANES_PER_PASS, int RAKING_THREADS_PER_LANE, int SETS_PER_PASS, int SCAN_LANES_PER_SET>
-__device__ __forceinline__ void RecoverDigitCounts(
-	int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE],
-	int counts[SETS_PER_PASS],
-	int copy_section)
-{
-	int my_lane = threadIdx.x >> 2;
-	int my_quad_byte = threadIdx.x & 3;
-	
-	#pragma unroll
-	for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-		unsigned char *warpscan_count = (unsigned char *) &warpscan[my_lane + (SCAN_LANES_PER_SET * SET)][1 + copy_section][RAKING_THREADS_PER_LANE - 1];
-		counts[SET] = warpscan_count[my_quad_byte];
-	}
-}
-
-template<int RADIX_DIGITS>
-__device__ __forceinline__ void CorrectUnguardedSetOverflow(
-	int2 			set_digits,
-	int 	&set_count)				
-{
-	if (WarpVoteAll(RADIX_DIGITS, set_count <= 1)) {
-		// All first-pass, first set keys have same digit. 
-		set_count = (threadIdx.x == set_digits.x) ? 256 : 0;
-	}
-}
-
-template <int RADIX_DIGITS, int SETS_PER_PASS>
-__device__ __forceinline__ void CorrectUnguardedPassOverflow(
-	int2 			pass_digits[SETS_PER_PASS],
-	int 	pass_counts[SETS_PER_PASS])				
-{
-	// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-	// telling me "Advisory: Loop was not unrolled, unexpected call OPs"
-
-	if (SETS_PER_PASS > 0) CorrectUnguardedSetOverflow<RADIX_DIGITS>(pass_digits[0], pass_counts[0]);
-	if (SETS_PER_PASS > 1) CorrectUnguardedSetOverflow<RADIX_DIGITS>(pass_digits[1], pass_counts[1]);
-	if (SETS_PER_PASS > 2) CorrectUnguardedSetOverflow<RADIX_DIGITS>(pass_digits[2], pass_counts[2]);
-	if (SETS_PER_PASS > 3) CorrectUnguardedSetOverflow<RADIX_DIGITS>(pass_digits[3], pass_counts[3]);
-}
-
-
-template <int RADIX_DIGITS, int PASSES_PER_CYCLE, int SETS_PER_PASS>
-__device__ __forceinline__ void CorrectUnguardedCycleOverflow(
-	int2 			cycle_digits[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int 	cycle_counts[PASSES_PER_CYCLE][SETS_PER_PASS])
-{
-	// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-	// telling me "Advisory: Loop was not unrolled, unexpected call OPs"
-
-	if (PASSES_PER_CYCLE > 0) CorrectUnguardedPassOverflow<RADIX_DIGITS, SETS_PER_PASS>(cycle_digits[0], cycle_counts[0]);
-	if (PASSES_PER_CYCLE > 1) CorrectUnguardedPassOverflow<RADIX_DIGITS, SETS_PER_PASS>(cycle_digits[1], cycle_counts[1]);
-}
-
-
-template <int RADIX_DIGITS>
-__device__ __forceinline__ void CorrectLastLaneOverflow(int &count, int extra[1]) 
-{
-	if (WarpVoteAll(RADIX_DIGITS, count == 0) && (threadIdx.x == RADIX_DIGITS - 1)) {
-		// We're 'f' and we overflowed b/c of invalid 'f' placemarkers; the number of valid items in this set is the count of valid f's 
-		count = extra[0] & 255;
-	}
-}
-		
-
-template <int RADIX_DIGITS, int PASSES_PER_CYCLE, int SETS_PER_PASS, int SETS_PER_CYCLE, bool UNGUARDED_IO>
-__device__ __forceinline__ void CorrectForOverflows(
-	int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int counts[PASSES_PER_CYCLE][SETS_PER_PASS], 
-	int extra[1])				
-{
-	if (!UNGUARDED_IO) {
-
-		// Correct any overflow in the partially-filled last lane
-		int *linear_counts = (int *) counts;
-		CorrectLastLaneOverflow<RADIX_DIGITS>(linear_counts[SETS_PER_CYCLE - 1], extra);
-	}
-
-	CorrectUnguardedCycleOverflow<RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS>(digits, counts);
-}
-
-
-template <
-	typename K,
-	int BIT, 
-	int RADIX_DIGITS,
-	int SCAN_LANES_PER_SET,
-	int SETS_PER_PASS,
-	int RAKING_THREADS_PER_PASS,
-	int SCAN_LANES_PER_PASS,
-	int LOG_RAKING_THREADS_PER_LANE,
-	int RAKING_THREADS_PER_LANE,
-	int PARTIALS_PER_SEG,
-	int PADDED_PARTIALS_PER_LANE,
-	int PASSES_PER_CYCLE>
-__device__ __forceinline__ void ScanPass(
-	int *base_partial,
-	int	*raking_partial,
-	int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE],
-	typename VecType<K, 2>::Type keypairs[SETS_PER_PASS],
-	int2 digits[SETS_PER_PASS],
-	int2 flag_offsets[SETS_PER_PASS],
-	int2 ranks[SETS_PER_PASS],
-	int copy_section)
-{
-	// Reset smem
-	#pragma unroll
-	for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) {
-		base_partial[SCAN_LANE * PADDED_PARTIALS_PER_LANE] = 0;
-	}
-	
-	// Decode digits for first pass
-	DecodeDigits<K, RADIX_DIGITS, BIT, SETS_PER_PASS, SCAN_LANES_PER_SET, PADDED_PARTIALS_PER_LANE>(
-		keypairs, digits, flag_offsets);
-	
-	// Encode counts into smem for first pass
-	PlacePartials<SETS_PER_PASS>(
-		(unsigned char *) base_partial,
-		digits,
-		flag_offsets); 
-	
-	__syncthreads();
-	
-	// Intra-group prefix scans for first pass
-	if (threadIdx.x < RAKING_THREADS_PER_PASS) {
-	
-		PrefixScanOverLanes<SCAN_LANES_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, PARTIALS_PER_SEG>(		// first pass is offset right by one
-			raking_partial,
-			warpscan, 
-			copy_section);
-	}
-	
-	__syncthreads();
-
-	// Extract ranks
-	ExtractRanks<SETS_PER_PASS>(
-		(unsigned char *) base_partial, 
-		digits, 
-		flag_offsets, 
-		ranks); 	
-}	
-	
-
-/******************************************************************************
- * SM1.3 Local Exchange Routines
- * 
- * Routines for exchanging keys (and values) in shared memory (i.e., local 
- * scattering) in order to to facilitate coalesced global scattering
- ******************************************************************************/
-
-template <typename T, bool UNGUARDED_IO, int PASSES_PER_CYCLE, int SETS_PER_PASS, typename PostprocessFunctor>
-__device__ __forceinline__ void ScatterSets(
-	T *d_out, 
-	typename VecType<T, 2>::Type pairs[SETS_PER_PASS],
-	int2 offsets[SETS_PER_PASS],
-	const int BASE4,
-	int extra[1],
-	PostprocessFunctor postprocess = PostprocessFunctor())				
-{
-	#pragma unroll 
-	for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-		postprocess(pairs[SET].x);
-		postprocess(pairs[SET].y);
-	}
-
-	// N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower 
-		
-	if (SETS_PER_PASS > 0) { 
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 0) < extra[0])) 
-			d_out[offsets[0].x] = pairs[0].x;
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 1) < extra[0])) 
-			d_out[offsets[0].y] = pairs[0].y;
-	}
-
-	if (SETS_PER_PASS > 1) { 
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 2) < extra[0])) 
-			d_out[offsets[1].x] = pairs[1].x;
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 3) < extra[0])) 
-			d_out[offsets[1].y] = pairs[1].y;
-	}
-
-	if (SETS_PER_PASS > 2) { 
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 4) < extra[0])) 
-			d_out[offsets[2].x] = pairs[2].x;
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 5) < extra[0])) 
-			d_out[offsets[2].y] = pairs[2].y;
-	}
-
-	if (SETS_PER_PASS > 3) { 
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 6) < extra[0])) 
-			d_out[offsets[3].x] = pairs[3].x;
-		if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 7) < extra[0])) 
-			d_out[offsets[3].y] = pairs[3].y;
-	}
-}
-
-template <typename T, int PASSES_PER_CYCLE, int SETS_PER_PASS>
-__device__ __forceinline__ void PushPairs(
-	T *swap, 
-	typename VecType<T, 2>::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS])				
-{
-	#pragma unroll 
-	for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-	
-		#pragma unroll 
-		for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-			swap[ranks[PASS][SET].x] = pairs[PASS][SET].x;
-			swap[ranks[PASS][SET].y] = pairs[PASS][SET].y;
-		}
-	}
-}
-	
-template <typename T, int PASSES_PER_CYCLE, int SETS_PER_PASS>
-__device__ __forceinline__ void ExchangePairs(
-	T *swap, 
-	typename VecType<T, 2>::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS])				
-{
-	// Push in Pairs
-	PushPairs<T, PASSES_PER_CYCLE, SETS_PER_PASS>(swap, pairs, ranks);
-	
-	__syncthreads();
-	
-	// Extract pairs
-	#pragma unroll 
-	for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-		
-		#pragma unroll 
-		for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-			const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2;
-			pairs[PASS][SET].x = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0))];
-			pairs[PASS][SET].y = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1))];
-		}
-	}
-}
-
-
-template <
-	typename K,
-	typename V,	
-	int RADIX_DIGITS, 
-	int BIT, 
-	int PASSES_PER_CYCLE,
-	int SETS_PER_PASS,
-	bool UNGUARDED_IO,
-	typename PostprocessFunctor>
-__device__ __forceinline__ void SwapAndScatterSm13(
-	typename VecType<K, 2>::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], 
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int4 *exchange,
-	typename VecType<V, 2>::Type *d_in_values, 
-	K *d_out_keys, 
-	V *d_out_values, 
-	int carry[RADIX_DIGITS], 
-	int extra[1])				
-{
-	int2 offsets[PASSES_PER_CYCLE][SETS_PER_PASS];
-	
-	// Swap keys according to ranks
-	ExchangePairs<K, PASSES_PER_CYCLE, SETS_PER_PASS>((K*) exchange, keypairs, ranks);				
-	
-	// Calculate scatter offsets (re-decode digits from keys: it's less work than making a second exchange of digits) 
-	#pragma unroll 
-	for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-		
-		#pragma unroll 
-		for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-			const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2;
-			offsets[PASS][SET].x = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0)) + carry[DecodeDigit<K, RADIX_DIGITS, BIT>(keypairs[PASS][SET].x)];
-			offsets[PASS][SET].y = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1)) + carry[DecodeDigit<K, RADIX_DIGITS, BIT>(keypairs[PASS][SET].y)];
-		}
-	}
-	
-	// Scatter keys
-	#pragma unroll 
-	for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-		const int BLOCK = PASS * SETS_PER_PASS * 2;
-		ScatterSets<K, UNGUARDED_IO, PASSES_PER_CYCLE, SETS_PER_PASS, PostprocessFunctor>(d_out_keys, keypairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra);
-	}
-
-	if (!IsKeysOnly<V>()) {
-	
-		__syncthreads();
-
-		// Read input data
-		typename VecType<V, 2>::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS];
-
-		// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-		// telling me "Advisory: Loop was not unrolled, unexpected control flow"
-
-		if (PASSES_PER_CYCLE > 0) ReadSets<V, UNGUARDED_IO, SETS_PER_PASS, NopFunctor<V> >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra);
-		if (PASSES_PER_CYCLE > 1) ReadSets<V, UNGUARDED_IO, SETS_PER_PASS, NopFunctor<V> >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra);
-		
-		// Swap data according to ranks
-		ExchangePairs<V, PASSES_PER_CYCLE, SETS_PER_PASS>((V*) exchange, datapairs, ranks);
-		
-		// Scatter data
-		#pragma unroll 
-		for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-			const int BLOCK = PASS * SETS_PER_PASS * 2;
-			ScatterSets<V, UNGUARDED_IO, PASSES_PER_CYCLE, SETS_PER_PASS, NopFunctor<V> >(d_out_values, datapairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra);
-		}
-	}
-}
-
-
-/******************************************************************************
- * SM1.0 Local Exchange Routines
- *
- * Routines for exchanging keys (and values) in shared memory (i.e., local 
- * scattering) in order to to facilitate coalesced global scattering
- ******************************************************************************/
-
-template <
-	typename T, 
-	int RADIX_DIGITS,
-	bool UNGUARDED_IO,
-	typename PostprocessFunctor> 
-__device__ __forceinline__ void ScatterPass(
-	T *swapmem,
-	T *d_out, 
-	int digit_scan[2][RADIX_DIGITS], 
-	int carry[RADIX_DIGITS], 
-	int extra[1],
-	int base_digit,				
-	PostprocessFunctor postprocess = PostprocessFunctor())				
-{
-	const int LOG_STORE_TXN_THREADS = B40C_LOG_MEM_BANKS(__CUDA_ARCH__);
-	const int STORE_TXN_THREADS = 1 << LOG_STORE_TXN_THREADS;
-	
-	int store_txn_idx = threadIdx.x & (STORE_TXN_THREADS - 1);
-	int store_txn_digit = threadIdx.x >> LOG_STORE_TXN_THREADS;
-	
-	int my_digit = base_digit + store_txn_digit;
-	if (my_digit < RADIX_DIGITS) {
-	
-		int my_exclusive_scan = digit_scan[1][my_digit - 1];
-		int my_inclusive_scan = digit_scan[1][my_digit];
-		int my_digit_count = my_inclusive_scan - my_exclusive_scan;
-
-		int my_carry = carry[my_digit] + my_exclusive_scan;
-		int my_aligned_offset = store_txn_idx - (my_carry & (STORE_TXN_THREADS - 1));
-		
-		while (my_aligned_offset < my_digit_count) {
-
-			if ((my_aligned_offset >= 0) && (UNGUARDED_IO || (my_exclusive_scan + my_aligned_offset < extra[0]))) { 
-			
-				T datum = swapmem[my_exclusive_scan + my_aligned_offset];
-				postprocess(datum);
-				d_out[my_carry + my_aligned_offset] = datum;
-			}
-			my_aligned_offset += STORE_TXN_THREADS;
-		}
-	}
-}
-
-template <
-	typename T,
-	int RADIX_DIGITS, 
-	int PASSES_PER_CYCLE,
-	int SETS_PER_PASS,
-	bool UNGUARDED_IO,
-	typename PostprocessFunctor>
-__device__ __forceinline__ void SwapAndScatterPairs(
-	typename VecType<T, 2>::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], 
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS],
-	T *exchange,
-	T *d_out, 
-	int carry[RADIX_DIGITS], 
-	int digit_scan[2][RADIX_DIGITS], 
-	int extra[1])				
-{
-	const int SCATTER_PASS_DIGITS = B40C_RADIXSORT_WARPS * (B40C_WARP_THREADS / B40C_MEM_BANKS(__CUDA_ARCH__));
-	const int SCATTER_PASSES = RADIX_DIGITS / SCATTER_PASS_DIGITS;
-
-	// Push in pairs
-	PushPairs<T, PASSES_PER_CYCLE, SETS_PER_PASS>(exchange, pairs, ranks);
-
-	__syncthreads();
-
-	// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-	// telling me "Advisory: Loop was not unrolled, not an innermost loop"
-
-	if (SCATTER_PASSES > 0) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 0);
-	if (SCATTER_PASSES > 1) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 1);
-	if (SCATTER_PASSES > 2) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 2);
-	if (SCATTER_PASSES > 3) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 3);
-	if (SCATTER_PASSES > 4) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 4);
-	if (SCATTER_PASSES > 5) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 5);
-	if (SCATTER_PASSES > 6) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 6);
-	if (SCATTER_PASSES > 7) ScatterPass<T, RADIX_DIGITS, UNGUARDED_IO, PostprocessFunctor>(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 7);
-}
-
-
-template <
-	typename K,
-	typename V,	
-	int RADIX_DIGITS, 
-	int PASSES_PER_CYCLE,
-	int SETS_PER_PASS,
-	bool UNGUARDED_IO,
-	typename PostprocessFunctor>
-__device__ __forceinline__ void SwapAndScatterSm10(
-	typename VecType<K, 2>::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], 
-	int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS],
-	int4 *exchange,
-	typename VecType<V, 2>::Type *d_in_values, 
-	K *d_out_keys, 
-	V *d_out_values, 
-	int carry[RADIX_DIGITS], 
-	int digit_scan[2][RADIX_DIGITS], 
-	int extra[1])				
-{
-	// Swap and scatter keys
-	SwapAndScatterPairs<K, RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS, UNGUARDED_IO, PostprocessFunctor>(
-		keypairs, ranks, (K*) exchange, d_out_keys, carry, digit_scan, extra);				
-	
-	if (!IsKeysOnly<V>()) {
-
-		__syncthreads();
-		
-		// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-		// telling me "Advisory: Loop was not unrolled, unexpected control flow"
-
-		// Read input data
-		typename VecType<V, 2>::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS];
-		if (PASSES_PER_CYCLE > 0) ReadSets<V, UNGUARDED_IO, SETS_PER_PASS, NopFunctor<V> >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra);
-		if (PASSES_PER_CYCLE > 1) ReadSets<V, UNGUARDED_IO, SETS_PER_PASS, NopFunctor<V> >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra);
-
-		// Swap and scatter data
-		SwapAndScatterPairs<V, RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS, UNGUARDED_IO, NopFunctor<V> >(
-			datapairs, ranks, (V*) exchange, d_out_values, carry, digit_scan, extra);				
-	}
-}
-
-
-/******************************************************************************
- * Cycle of RADIXSORT_CYCLE_ELEMENTS keys (and values)
- ******************************************************************************/
-
-template <
-	typename K,
-	typename V,	
-	int BIT, 
-	bool UNGUARDED_IO,
-	int RADIX_DIGITS,
-	int LOG_SCAN_LANES_PER_SET,
-	int SCAN_LANES_PER_SET,
-	int SETS_PER_PASS,
-	int PASSES_PER_CYCLE,
-	int LOG_SCAN_LANES_PER_PASS,
-	int SCAN_LANES_PER_PASS,
-	int LOG_PARTIALS_PER_LANE,
-	int LOG_PARTIALS_PER_PASS,
-	int LOG_RAKING_THREADS_PER_PASS,
-	int RAKING_THREADS_PER_PASS,
-	int LOG_RAKING_THREADS_PER_LANE,
-	int RAKING_THREADS_PER_LANE,
-	int LOG_PARTIALS_PER_SEG,
-	int PARTIALS_PER_SEG,
-	int LOG_PARTIALS_PER_ROW,
-	int PARTIALS_PER_ROW,
-	int LOG_SEGS_PER_ROW,	
-	int SEGS_PER_ROW,
-	int LOG_ROWS_PER_SET,
-	int LOG_ROWS_PER_LANE,
-	int ROWS_PER_LANE,
-	int LOG_ROWS_PER_PASS,
-	int ROWS_PER_PASS,
-	int MAX_EXCHANGE_BYTES,
-	typename PreprocessFunctor,
-	typename PostprocessFunctor>
-
-__device__ __forceinline__ void SrtsScanDigitCycle(
-	typename VecType<K, 2>::Type *d_in_keys, 
-	typename VecType<V, 2>::Type *d_in_values, 
-	K *d_out_keys, 
-	V *d_out_values, 
-	int4 *exchange,								
-	int	warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE],
-	int	carry[RADIX_DIGITS],
-	int	digit_scan[2][RADIX_DIGITS],						 
-	int	digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS],
-	int	extra[1],
-	int	*base_partial,
-	int	*raking_partial)		
-{
-	
-	const int PADDED_PARTIALS_PER_LANE 		= ROWS_PER_LANE * (PARTIALS_PER_ROW + 1);	 
-	const int SETS_PER_CYCLE 				= PASSES_PER_CYCLE * SETS_PER_PASS;
-
-	// N.B.: We use the following voodoo incantations to elide the compiler's miserable 
-	// "declared but never referenced" warnings for these (which are actually used for 
-	// template instantiation)	
-	SuppressUnusedConstantWarning(PADDED_PARTIALS_PER_LANE);
-	SuppressUnusedConstantWarning(SETS_PER_CYCLE);
-	
-	typename VecType<K, 2>::Type 	keypairs[PASSES_PER_CYCLE][SETS_PER_PASS];
-	int2 							digits[PASSES_PER_CYCLE][SETS_PER_PASS];
-	int2 							flag_offsets[PASSES_PER_CYCLE][SETS_PER_PASS];		// a byte offset
-	int2 							ranks[PASSES_PER_CYCLE][SETS_PER_PASS];
-
-	
-	//-------------------------------------------------------------------------
-	// Read keys
-	//-------------------------------------------------------------------------
-
-	// N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, 
-	// telling me "Advisory: Loop was not unrolled, unexpected control flow construct"
-	
-	// Read Keys
-	if (PASSES_PER_CYCLE > 0) ReadSets<K, UNGUARDED_IO, SETS_PER_PASS, PreprocessFunctor>(d_in_keys, keypairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra);		 
-	if (PASSES_PER_CYCLE > 1) ReadSets<K, UNGUARDED_IO, SETS_PER_PASS, PreprocessFunctor>(d_in_keys, keypairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); 	
-	
-	//-------------------------------------------------------------------------
-	// Lane-scanning Passes
-	//-------------------------------------------------------------------------
-
-	#pragma unroll
-	for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-	
-		// First Pass
-		ScanPass<K, BIT, RADIX_DIGITS, SCAN_LANES_PER_SET, SETS_PER_PASS, RAKING_THREADS_PER_PASS, SCAN_LANES_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, PARTIALS_PER_SEG, PADDED_PARTIALS_PER_LANE, PASSES_PER_CYCLE>(
-			base_partial,
-			raking_partial,
-			warpscan,
-			keypairs[PASS],
-			digits[PASS],
-			flag_offsets[PASS],
-			ranks[PASS],
-			PASSES_PER_CYCLE - PASS - 1);		// lower passes get copied right
-	}
-	
-	//-------------------------------------------------------------------------
-	// Digit-scanning 
-	//-------------------------------------------------------------------------
-
-	// Recover second-half digit-counts, scan across all digit-counts
-	if (threadIdx.x < RADIX_DIGITS) {
-
-		int counts[PASSES_PER_CYCLE][SETS_PER_PASS];
-
-		// Recover digit-counts
-
-		#pragma unroll
-		for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-			RecoverDigitCounts<SCAN_LANES_PER_PASS, RAKING_THREADS_PER_LANE, SETS_PER_PASS, SCAN_LANES_PER_SET>(		// first pass, offset by 1			
-				warpscan, 
-				counts[PASS],
-				PASSES_PER_CYCLE - PASS - 1);		// lower passes get copied right
-		}
-		
-		// Check for overflows
-		CorrectForOverflows<RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS, SETS_PER_CYCLE, UNGUARDED_IO>(
-				digits, counts, extra);
-
-		// Scan across my digit counts for each set 
-		int exclusive_total = 0;
-		int inclusive_total = 0;
-		
-		#pragma unroll
-		for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-		
-			#pragma unroll
-			for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-				inclusive_total += counts[PASS][SET];
-				counts[PASS][SET] = exclusive_total;
-				exclusive_total = inclusive_total;
-			}
-		}
-
-		// second half of carry update
-		int my_carry = carry[threadIdx.x] + digit_scan[1][threadIdx.x];
-
-		// Perform overflow-free SIMD Kogge-Stone across digits
-		int digit_prefix = WarpScan<RADIX_DIGITS, false>(
-				digit_scan, 
-				inclusive_total,
-				0);
-
-		// first-half of carry update 
-		carry[threadIdx.x] = my_carry - digit_prefix;
-		
-		#pragma unroll
-		for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) {
-
-			#pragma unroll
-			for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) {
-				digit_counts[PASS][SET][threadIdx.x] = counts[PASS][SET] + digit_prefix;
-			}
-		}
-	}
-	
-	__syncthreads();
-
-	//-------------------------------------------------------------------------
-	// Update Ranks
-	//-------------------------------------------------------------------------
-
-	UpdateRanks<RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS>(digits, ranks, digit_counts);
-	
-	
-	//-------------------------------------------------------------------------
-	// Scatter 
-	//-------------------------------------------------------------------------
-
-#if ((__CUDA_ARCH__ < 130) || FERMI_ECC)		
-
-	SwapAndScatterSm10<K, V, RADIX_DIGITS, PASSES_PER_CYCLE, SETS_PER_PASS, UNGUARDED_IO, PostprocessFunctor>(
-		keypairs, 
-		ranks,
-		exchange,
-		d_in_values, 
-		d_out_keys, 
-		d_out_values, 
-		carry, 
-		digit_scan,
-		extra);
-	
-#else 
-
-	SwapAndScatterSm13<K, V, RADIX_DIGITS, BIT, PASSES_PER_CYCLE, SETS_PER_PASS, UNGUARDED_IO, PostprocessFunctor>(
-		keypairs, 
-		ranks,
-		exchange,
-		d_in_values, 
-		d_out_keys, 
-		d_out_values, 
-		carry, 
-		extra);
-	
-#endif
-
-	__syncthreads();
-
-}
-
-
-
-/******************************************************************************
- * Scan/Scatter Kernel Entry Point
- ******************************************************************************/
-
-template <
-	typename K, 
-	typename V, 
-	int PASS, 
-	int RADIX_BITS, 
-	int BIT, 
-	typename PreprocessFunctor, 
-	typename PostprocessFunctor>
-__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(__CUDA_ARCH__))
-__global__ 
-void ScanScatterDigits(
-	bool *d_from_alt_storage,
-	int* d_spine,
-	K* d_in_keys,
-	K* d_out_keys,
-	V* d_in_values,
-	V* d_out_values,
-	CtaDecomposition work_decomposition)
-{
-
-	const int RADIX_DIGITS 				= 1 << RADIX_BITS;
-	
-	const int LOG_SCAN_LANES_PER_SET	= (RADIX_BITS > 2) ? RADIX_BITS - 2 : 0;					// Always at one lane per set
-	const int SCAN_LANES_PER_SET		= 1 << LOG_SCAN_LANES_PER_SET;								// N.B.: we have "declared but never referenced" warnings for these, but they're actually used for template instantiation
-	
-	const int LOG_SETS_PER_PASS			= B40C_RADIXSORT_LOG_SETS_PER_PASS(__CUDA_ARCH__);			
-	const int SETS_PER_PASS				= 1 << LOG_SETS_PER_PASS;
-	
-	const int LOG_PASSES_PER_CYCLE		= B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(__CUDA_ARCH__, K, V);			
-	const int PASSES_PER_CYCLE			= 1 << LOG_PASSES_PER_CYCLE;
-
-	const int LOG_SCAN_LANES_PER_PASS	= LOG_SETS_PER_PASS + LOG_SCAN_LANES_PER_SET;
-	const int SCAN_LANES_PER_PASS		= 1 << LOG_SCAN_LANES_PER_PASS;
-	
-	const int LOG_PARTIALS_PER_LANE 	= B40C_RADIXSORT_LOG_THREADS;
-	
-	const int LOG_PARTIALS_PER_PASS		= LOG_SCAN_LANES_PER_PASS + LOG_PARTIALS_PER_LANE;
-
-	const int LOG_RAKING_THREADS_PER_PASS 		= B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(__CUDA_ARCH__);
-	const int RAKING_THREADS_PER_PASS			= 1 << LOG_RAKING_THREADS_PER_PASS;
-
-	const int LOG_RAKING_THREADS_PER_LANE 		= LOG_RAKING_THREADS_PER_PASS - LOG_SCAN_LANES_PER_PASS;
-	const int RAKING_THREADS_PER_LANE 			= 1 << LOG_RAKING_THREADS_PER_LANE;
-
-	const int LOG_PARTIALS_PER_SEG 		= LOG_PARTIALS_PER_LANE - LOG_RAKING_THREADS_PER_LANE;
-	const int PARTIALS_PER_SEG 			= 1 << LOG_PARTIALS_PER_SEG;
-
-	const int LOG_PARTIALS_PER_ROW		= (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG;		// floor of MEM_BANKS partials per row
-	const int PARTIALS_PER_ROW			= 1 << LOG_PARTIALS_PER_ROW;
-	const int PADDED_PARTIALS_PER_ROW 	= PARTIALS_PER_ROW + 1;
-
-	const int LOG_SEGS_PER_ROW 			= LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG;	
-	const int SEGS_PER_ROW				= 1 << LOG_SEGS_PER_ROW;
-
-	const int LOG_ROWS_PER_SET 			= LOG_PARTIALS_PER_PASS - LOG_PARTIALS_PER_ROW;
-
-	const int LOG_ROWS_PER_LANE 		= LOG_PARTIALS_PER_LANE - LOG_PARTIALS_PER_ROW;
-	const int ROWS_PER_LANE 			= 1 << LOG_ROWS_PER_LANE;
-
-	const int LOG_ROWS_PER_PASS 		= LOG_SCAN_LANES_PER_PASS + LOG_ROWS_PER_LANE;
-	const int ROWS_PER_PASS 			= 1 << LOG_ROWS_PER_PASS;
-	
-	const int SCAN_LANE_BYTES			= ROWS_PER_PASS * PADDED_PARTIALS_PER_ROW * sizeof(int);
-	const int MAX_EXCHANGE_BYTES		= (sizeof(K) > sizeof(V)) ? 
-													B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(K) : 
-													B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(V);
-	const int SCAN_LANE_INT4S         = (B40C_MAX(MAX_EXCHANGE_BYTES, SCAN_LANE_BYTES) + sizeof(int4) - 1) / sizeof(int4);
-
-
-	// N.B.: We use the following voodoo incantations to elide the compiler's miserable 
-	// "declared but never referenced" warnings for these (which are actually used for 
-	// template instantiation)	
-	SuppressUnusedConstantWarning(SCAN_LANES_PER_SET);
-	SuppressUnusedConstantWarning(PARTIALS_PER_SEG);
-	SuppressUnusedConstantWarning(LOG_ROWS_PER_SET);
-	SuppressUnusedConstantWarning(ROWS_PER_LANE);
-
-    // scan_lanes is a int4[] to avoid alignment issues when casting to (K *) and/or (V *)
-	__shared__ int4		scan_lanes[SCAN_LANE_INT4S];
-	__shared__ int 		warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE];		// One warpscan per fours-group
-	__shared__ int 		carry[RADIX_DIGITS];
-	__shared__ int 		digit_scan[2][RADIX_DIGITS];						 
-	__shared__ int 		digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS];
-	__shared__ bool 	non_trivial_digit_pass;
-	__shared__ bool		from_alt_storage;
-	
-	_B40C_REG_MISER_QUALIFIER_ int extra[1];
-	_B40C_REG_MISER_QUALIFIER_ int oob[1];
-
-	extra[0] = (blockIdx.x == gridDim.x - 1) ? work_decomposition.extra_elements_last_block : 0;
-
-	// calculate our threadblock's range
-	int block_elements, block_offset;
-	if (blockIdx.x < work_decomposition.num_big_blocks) {
-		block_offset = work_decomposition.big_block_elements * blockIdx.x;
-		block_elements = work_decomposition.big_block_elements;
-	} else {
-		block_offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V));
-		block_elements = work_decomposition.normal_block_elements;
-	}
-	oob[0] = block_offset + block_elements;	// out-of-bounds
-
-	
-	// location for placing 2-element partial reductions in the first lane of a pass	
-	int row = threadIdx.x >> LOG_PARTIALS_PER_ROW; 
-	int col = threadIdx.x & (PARTIALS_PER_ROW - 1); 
-	int *base_partial = reinterpret_cast<int *>(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; 								
-	
-	// location for raking across all sets within a pass
-	int *raking_partial = 0;										
-
-	if (threadIdx.x < RAKING_THREADS_PER_PASS) {
-
-		// initalize lane warpscans
-		if (threadIdx.x < RAKING_THREADS_PER_LANE) {
-			
-			#pragma unroll
-			for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) {
-				warpscan[SCAN_LANE][0][threadIdx.x] = 0;
-			}
-		}
-
-		// initialize digit warpscans
-		if (threadIdx.x < RADIX_DIGITS) {
-
-			// Initialize digit_scan
-			digit_scan[0][threadIdx.x] = 0;
-			digit_scan[1][threadIdx.x] = 0;
-
-			// Determine where to read our input
-			from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1];
-
-			// Read carry in parallel 
-			int spine_digit_offset = FastMul(gridDim.x, threadIdx.x);
-			int my_digit_carry = d_spine[spine_digit_offset + blockIdx.x];
-			carry[threadIdx.x] = my_digit_carry;
-
-			// Determine whether or not we have work to do and setup the next round 
-			// accordingly.  Everybody but the first threadblock can determine this 
-			// from the number of non-zero-and-non-oob digit carries.  First block 
-			// needs someone else's because he always writes the zero offset.
-			
-			int predicate;
-			if (PreprocessFunctor::MustApply() || PostprocessFunctor::MustApply()) {
-
-				non_trivial_digit_pass = true;
-
-			} else {
-
-				if (blockIdx.x > 0) {
-					// Non-first CTA : use digit-carry from first block
-					my_digit_carry = d_spine[spine_digit_offset];
-				}
-				
-				predicate = ((my_digit_carry > 0) && (my_digit_carry < work_decomposition.num_elements));
-				non_trivial_digit_pass = (TallyWarpVote(RADIX_DIGITS, predicate, reinterpret_cast<int *>(scan_lanes)) > 0);
-			}
-
-			// Let the next round know which set of buffers to use
-			if (blockIdx.x == 0) d_from_alt_storage[(PASS + 1) & 0x1] = from_alt_storage ^ non_trivial_digit_pass;
-		}
-
-		// initialize raking segment
-		row = threadIdx.x >> LOG_SEGS_PER_ROW;
-		col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG;
-		raking_partial = reinterpret_cast<int *>(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; 
-	}
-
-	// Sync to acquire non_trivial_digit_pass and from_temp_storage
-	__syncthreads();
-	
-	// Short-circuit this entire pass
-	if (!non_trivial_digit_pass) return; 
-
-	if (!from_alt_storage) {
-	
-		// Scan in tiles of cycle_elements
-		while (block_offset < oob[0]) {
-	
-			SrtsScanDigitCycle<K, V, BIT, true, RADIX_DIGITS, LOG_SCAN_LANES_PER_SET, SCAN_LANES_PER_SET, SETS_PER_PASS, PASSES_PER_CYCLE, LOG_SCAN_LANES_PER_PASS, SCAN_LANES_PER_PASS, LOG_PARTIALS_PER_LANE, LOG_PARTIALS_PER_PASS, LOG_RAKING_THREADS_PER_PASS, RAKING_THREADS_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, LOG_PARTIALS_PER_SEG, PARTIALS_PER_SEG, LOG_PARTIALS_PER_ROW, PARTIALS_PER_ROW, LOG_SEGS_PER_ROW, SEGS_PER_ROW, LOG_ROWS_PER_SET, LOG_ROWS_PER_LANE, ROWS_PER_LANE, LOG_ROWS_PER_PASS, ROWS_PER_PASS, MAX_EXCHANGE_BYTES, PreprocessFunctor, PostprocessFunctor>(	
-				reinterpret_cast<typename VecType<K, 2>::Type *>((void *) &d_in_keys[block_offset]), 
-				reinterpret_cast<typename VecType<V, 2>::Type *>((void *) &d_in_values[block_offset]), 
-				d_out_keys, 
-				d_out_values, 
-				scan_lanes,
-				warpscan,
-				carry,
-				digit_scan,						 
-				digit_counts,
-				extra,
-				base_partial,
-				raking_partial);		
-	
-			block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V);
-		}
-	
-		if (extra[0]) {
-			
-			SrtsScanDigitCycle<K, V, BIT, false, RADIX_DIGITS, LOG_SCAN_LANES_PER_SET, SCAN_LANES_PER_SET, SETS_PER_PASS, PASSES_PER_CYCLE, LOG_SCAN_LANES_PER_PASS, SCAN_LANES_PER_PASS, LOG_PARTIALS_PER_LANE, LOG_PARTIALS_PER_PASS, LOG_RAKING_THREADS_PER_PASS, RAKING_THREADS_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, LOG_PARTIALS_PER_SEG, PARTIALS_PER_SEG, LOG_PARTIALS_PER_ROW, PARTIALS_PER_ROW, LOG_SEGS_PER_ROW, SEGS_PER_ROW, LOG_ROWS_PER_SET, LOG_ROWS_PER_LANE, ROWS_PER_LANE, LOG_ROWS_PER_PASS, ROWS_PER_PASS, MAX_EXCHANGE_BYTES, PreprocessFunctor, PostprocessFunctor>(	
-				reinterpret_cast<typename VecType<K, 2>::Type *>((void *) &d_in_keys[block_offset]), 
-				reinterpret_cast<typename VecType<V, 2>::Type *>((void *) &d_in_values[block_offset]), 
-				d_out_keys, 
-				d_out_values, 
-				scan_lanes,
-				warpscan,
-				carry,
-				digit_scan,						 
-				digit_counts,
-				extra,
-				base_partial,
-				raking_partial);		
-		}
-
-	} else {
-		
-		// Scan in tiles of cycle_elements
-		while (block_offset < oob[0]) {
-
-			SrtsScanDigitCycle<K, V, BIT, true, RADIX_DIGITS, LOG_SCAN_LANES_PER_SET, SCAN_LANES_PER_SET, SETS_PER_PASS, PASSES_PER_CYCLE, LOG_SCAN_LANES_PER_PASS, SCAN_LANES_PER_PASS, LOG_PARTIALS_PER_LANE, LOG_PARTIALS_PER_PASS, LOG_RAKING_THREADS_PER_PASS, RAKING_THREADS_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, LOG_PARTIALS_PER_SEG, PARTIALS_PER_SEG, LOG_PARTIALS_PER_ROW, PARTIALS_PER_ROW, LOG_SEGS_PER_ROW, SEGS_PER_ROW, LOG_ROWS_PER_SET, LOG_ROWS_PER_LANE, ROWS_PER_LANE, LOG_ROWS_PER_PASS, ROWS_PER_PASS, MAX_EXCHANGE_BYTES, PreprocessFunctor, PostprocessFunctor>(	
-				reinterpret_cast<typename VecType<K, 2>::Type *>((void *) &d_out_keys[block_offset]), 
-				reinterpret_cast<typename VecType<V, 2>::Type *>((void *) &d_out_values[block_offset]), 
-				d_in_keys, 
-				d_in_values, 
-				scan_lanes,
-				warpscan,
-				carry,
-				digit_scan,						 
-				digit_counts,
-				extra,
-				base_partial,
-				raking_partial);		
-
-			block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V);
-		}
-
-		if (extra[0]) {
-			
-			SrtsScanDigitCycle<K, V, BIT, false, RADIX_DIGITS, LOG_SCAN_LANES_PER_SET, SCAN_LANES_PER_SET, SETS_PER_PASS, PASSES_PER_CYCLE, LOG_SCAN_LANES_PER_PASS, SCAN_LANES_PER_PASS, LOG_PARTIALS_PER_LANE, LOG_PARTIALS_PER_PASS, LOG_RAKING_THREADS_PER_PASS, RAKING_THREADS_PER_PASS, LOG_RAKING_THREADS_PER_LANE, RAKING_THREADS_PER_LANE, LOG_PARTIALS_PER_SEG, PARTIALS_PER_SEG, LOG_PARTIALS_PER_ROW, PARTIALS_PER_ROW, LOG_SEGS_PER_ROW, SEGS_PER_ROW, LOG_ROWS_PER_SET, LOG_ROWS_PER_LANE, ROWS_PER_LANE, LOG_ROWS_PER_PASS, ROWS_PER_PASS, MAX_EXCHANGE_BYTES, PreprocessFunctor, PostprocessFunctor>(	
-				reinterpret_cast<typename VecType<K, 2>::Type *>((void *) &d_out_keys[block_offset]), 
-				reinterpret_cast<typename VecType<V, 2>::Type *>((void *) &d_out_values[block_offset]), 
-				d_in_keys, 
-				d_in_values, 
-				scan_lanes,
-				warpscan,
-				carry,
-				digit_scan,						 
-				digit_counts,
-				extra,
-				base_partial,
-				raking_partial);		
-		}
-		
-	}
-}
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h
deleted file mode 100644
index 3d20f4aa79..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/******************************************************************************
- * 
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * 
- * 
- * 
- * AUTHORS' REQUEST: 
- * 
- * 		If you use|reference|benchmark this code, please cite our Technical 
- * 		Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf):
- * 
- *		@TechReport{ Merrill:Sorting:2010,
- *        	author = "Duane Merrill and Andrew Grimshaw",
- *        	title = "Revisiting Sorting for GPGPU Stream Architectures",
- *        	year = "2010",
- *        	institution = "University of Virginia, Department of Computer Science",
- *        	address = "Charlottesville, VA, USA",
- *        	number = "CS2010-03"
- *		}
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- * 
- ******************************************************************************/
-
-
-/******************************************************************************
- * Top-level histogram/spine scanning kernel
- ******************************************************************************/
-
-#pragma once
-
-#include "radixsort_kernel_common.h"
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-/******************************************************************************
- * Scans a cycle of RADIXSORT_CYCLE_ELEMENTS elements 
- ******************************************************************************/
-
-template<int PARTIALS_PER_SEG>
-__device__ __forceinline__ void SrtsScanCycle(
-	int *smem_offset,
-	int *smem_segment,
-	int warpscan[2][B40C_WARP_THREADS],
-	int4 *in, 
-	int4 *out,
-	int &carry)
-{
-	int4 datum; 
-
-	// read input data
-	datum = in[threadIdx.x];
-
-	smem_offset[0] = datum.x + datum.y + datum.z + datum.w;
-
-	__syncthreads();
-
-	if (threadIdx.x < B40C_WARP_THREADS) {
-
-		int partial_reduction = SerialReduce<PARTIALS_PER_SEG>(smem_segment);
-
-		int seed = WarpScan<B40C_WARP_THREADS, false>(warpscan, partial_reduction, 0);
-		seed += carry;		
-		
-		SerialScan<PARTIALS_PER_SEG>(smem_segment, seed);
-
-		carry += warpscan[1][B40C_WARP_THREADS - 1];	
-	}
-
-	__syncthreads();
-
-	int part0 = smem_offset[0];
-	int part1;
-
-	part1 = datum.x + part0;
-	datum.x = part0;
-	part0 = part1 + datum.y;
-	datum.y = part1;
-
-	part1 = datum.z + part0;
-	datum.z = part0;
-	part0 = part1 + datum.w;
-	datum.w = part1;
-	
-	out[threadIdx.x] = datum;
-}
-
-
-/******************************************************************************
- * Spine/histogram Scan Kernel Entry Point
- ******************************************************************************/
-
-template <typename T>
-__global__ void SrtsScanSpine(
-	int *d_ispine,
-	int *d_ospine,
-	int normal_block_elements)
-{
-	const int LOG_PARTIALS				= B40C_RADIXSORT_LOG_THREADS;				
-	const int PARTIALS			 		= 1 << LOG_PARTIALS;
-	
-	const int LOG_PARTIALS_PER_SEG 		= LOG_PARTIALS - B40C_LOG_WARP_THREADS;
-	const int PARTIALS_PER_SEG 			= 1 << LOG_PARTIALS_PER_SEG;
-
-	const int LOG_PARTIALS_PER_ROW		= (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG;		// floor of 32 elts per row
-	const int PARTIALS_PER_ROW			= 1 << LOG_PARTIALS_PER_ROW;
-	
-	const int LOG_SEGS_PER_ROW 			= LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG;	
-	const int SEGS_PER_ROW				= 1 << LOG_SEGS_PER_ROW;
-
-	const int SMEM_ROWS 				= PARTIALS / PARTIALS_PER_ROW;
-	
-	__shared__ int smem[SMEM_ROWS][PARTIALS_PER_ROW + 1];
-	__shared__ int warpscan[2][B40C_WARP_THREADS];
-
-  // WAR spurious unused constant warning
-  SuppressUnusedConstantWarning(PARTIALS_PER_SEG);
-
-	int *smem_segment = 0;
-	int carry = 0;
-
-	int row = threadIdx.x >> LOG_PARTIALS_PER_ROW;		
-	int col = threadIdx.x & (PARTIALS_PER_ROW - 1);			
-	int *smem_offset = &smem[row][col];
-
-	if (blockIdx.x > 0) {
-		return;
-	}
-	
-	if (threadIdx.x < B40C_WARP_THREADS) {
-		
-		// two segs per row, odd segs are offset by 8
-		row = threadIdx.x >> LOG_SEGS_PER_ROW;
-		col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG;
-		smem_segment = &smem[row][col];
-	
-		if (threadIdx.x < B40C_WARP_THREADS) {
-			carry = 0;
-			warpscan[0][threadIdx.x] = 0;
-		}
-	}
-
-	// scan the spine in blocks of cycle_elements
-	int block_offset = 0;
-	while (block_offset < normal_block_elements) {
-		
-		SrtsScanCycle<PARTIALS_PER_SEG>(	
-			smem_offset, 
-			smem_segment, 
-			warpscan,
-			reinterpret_cast<int4 *>((void *) &d_ispine[block_offset]), 
-			reinterpret_cast<int4 *>((void *) &d_ospine[block_offset]), 
-			carry);
-
-		block_offset += B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS;
-	}
-} 
-
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h b/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h
deleted file mode 100644
index 6db7931078..0000000000
--- a/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Copyright 2010 Duane Merrill
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License. 
- * 
- * For more information, see our Google Code project site: 
- * http://code.google.com/p/back40computing/
- * 
- * Thanks!
- */
-
-#pragma once
-
-#include <vector_types.h>
-
-namespace thrust  {
-namespace system  {
-namespace cuda    {
-namespace detail  {
-namespace detail  {
-namespace b40c_thrust   {
-
-//------------------------------------------------------------------------------
-// Vector types
-//------------------------------------------------------------------------------
-
-template <typename K, int vec_elements> struct VecType;
-
-
-//
-// Define general vector types
-//
-
-template <typename K> 
-struct VecType<K, 1> {
-	K x;
-	typedef K Type;
-};
-
-template <typename K> 
-struct VecType<K, 2> {
-	K x;
-	K y;
-	typedef VecType<K, 2> Type;
-};
-
-template <typename K> 
-struct VecType<K, 4> {
-	K x;
-	K y;
-	K z;
-	K w;
-	typedef VecType<K, 4> Type;
-};
-
-//
-// Specialize certain built-in vector types
-//
-
-#define B40C_DEFINE_VECTOR_TYPE(base_type,short_type)                           \
-  template<> struct VecType<base_type, 1> { typedef short_type##1 Type; };      \
-  template<> struct VecType<base_type, 2> { typedef short_type##2 Type; };      \
-  template<> struct VecType<base_type, 4> { typedef short_type##4 Type; };     
-
-B40C_DEFINE_VECTOR_TYPE(char,               char)
-B40C_DEFINE_VECTOR_TYPE(short,              short)
-B40C_DEFINE_VECTOR_TYPE(int,                int)
-B40C_DEFINE_VECTOR_TYPE(long,               long)
-B40C_DEFINE_VECTOR_TYPE(long long,          longlong)
-B40C_DEFINE_VECTOR_TYPE(unsigned char,      uchar)
-B40C_DEFINE_VECTOR_TYPE(unsigned short,     ushort)
-B40C_DEFINE_VECTOR_TYPE(unsigned int,       uint)
-B40C_DEFINE_VECTOR_TYPE(unsigned long,      ulong)
-B40C_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong)
-B40C_DEFINE_VECTOR_TYPE(float,              float)
-B40C_DEFINE_VECTOR_TYPE(double,             double)
-
-#undef B40C_DEFINE_VECTOR_TYPE
-
-} // end namespace b40c_thrust
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/balanced_path.h b/compat/thrust/system/cuda/detail/detail/balanced_path.h
deleted file mode 100644
index 51e4f5b767..0000000000
--- a/compat/thrust/system/cuda/detail/detail/balanced_path.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/pair.h>
-#include <thrust/detail/minmax.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace balanced_path_detail
-{
-
-template<bool UpperBound, typename IntT, typename It, typename T, typename Comp>
-__host__ __device__ void BinarySearchIteration(It data, int& begin, int& end,
-	T key, int shift, Comp comp) {
-
-	IntT scale = (1<< shift) - 1;
-	int mid = (int)((begin + scale * end)>> shift);
-
-	T key2 = data[mid];
-	bool pred = UpperBound ? !comp(key, key2) : comp(key2, key);
-	if(pred) begin = (int)mid + 1;
-	else end = mid;
-}
-
-template<bool UpperBound, typename T, typename It, typename Comp>
-__host__ __device__ int BinarySearch(It data, int count, T key, Comp comp) {
-	int begin = 0;
-	int end = count;
-	while(begin < end) 
-		BinarySearchIteration<UpperBound, int>(data, begin, end, key, 1, comp);
-	return begin;
-}
-
-template<bool UpperBound, typename IntT, typename T, typename It, typename Comp>
-__host__ __device__ int BiasedBinarySearch(It data, int count, T key, 
-	IntT levels, Comp comp) {
-	int begin = 0;
-	int end = count;
-
-	if(levels >= 4 && begin < end)
-		BinarySearchIteration<UpperBound, IntT>(data, begin, end, key, 9, comp);
-	if(levels >= 3 && begin < end)
-		BinarySearchIteration<UpperBound, IntT>(data, begin, end, key, 7, comp);
-	if(levels >= 2 && begin < end)
-		BinarySearchIteration<UpperBound, IntT>(data, begin, end, key, 5, comp);
-	if(levels >= 1 && begin < end)
-		BinarySearchIteration<UpperBound, IntT>(data, begin, end, key, 4, comp);
-
-	while(begin < end)
-		BinarySearchIteration<UpperBound, IntT>(data, begin, end, key, 1, comp);
-	return begin;
-}
-
-template<bool UpperBound, typename It1, typename It2, typename Comp>
-__host__ __device__ int MergePath(It1 a, int aCount, It2 b, int bCount, int diag, Comp comp)
-{
-  typedef typename thrust::iterator_traits<It1>::value_type T;
-  
-  int begin = thrust::max(0, diag - bCount);
-  int end   = thrust::min(diag, aCount);
-  
-  while(begin < end) 
-  {
-    int mid = (begin + end)>> 1;
-    T aKey = a[mid];
-    T bKey = b[diag - 1 - mid];
-    bool pred = UpperBound ? comp(aKey, bKey) : !comp(bKey, aKey);
-    if(pred) begin = mid + 1;
-    else end = mid;
-  }
-  return begin;
-}
-
-
-} // end namespace balanced_path_detail
-
-
-template<typename RandomAccessIterator1, typename Size1, typename RandomAccessIterator2, typename Size2, typename Compare>
-__host__ __device__
-thrust::pair<Size1,Size1>
-  balanced_path(RandomAccessIterator1 first1, Size1 n1,
-                RandomAccessIterator2 first2, Size1 n2,
-                Size1 diag,
-                Size2 levels,
-                Compare comp)
-{
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type T;
-
-  Size1 aIndex = balanced_path_detail::MergePath<false>(first1, n1, first2, n2, diag, comp);
-  Size1 bIndex = diag - aIndex;
-  
-  bool star = false;
-  if(bIndex < n2)
-  {
-    T x = first2[bIndex];
-    
-    // Search for the beginning of the duplicate run in both A and B.
-    Size1 aStart = balanced_path_detail::BiasedBinarySearch<false>(first1, aIndex, x, levels, comp);
-    Size1 bStart = balanced_path_detail::BiasedBinarySearch<false>(first2, bIndex, x, levels, comp);
-    
-    // The distance between x's merge path and its lower_bound is its rank.
-    // We add up the a and b ranks and evenly distribute them to
-    // get a stairstep path.
-    Size1 aRun = aIndex - aStart;
-    Size1 bRun = bIndex - bStart;
-    Size1 xCount = aRun + bRun;
-    
-    // Attempt to advance b and regress a.
-    Size1 bAdvance = thrust::max(xCount >> 1, xCount - aRun);
-    Size1 bEnd     = thrust::min<Size1>(n2, bStart + bAdvance + 1);
-    Size1 bRunEnd  = balanced_path_detail::BinarySearch<true>(first2 + bIndex, bEnd - bIndex, x, comp) + bIndex;
-    bRun = bRunEnd - bStart;
-    
-    bAdvance = thrust::min(bAdvance, bRun);
-    Size1 aAdvance = xCount - bAdvance;
-    
-    bool roundUp = (aAdvance == bAdvance + 1) && (bAdvance < bRun);
-    aIndex = aStart + aAdvance;
-    
-    if(roundUp) star = true;
-  }
-
-  return thrust::make_pair(aIndex, (diag - aIndex) + star);
-}
-
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h b/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h
deleted file mode 100644
index 2bbd658456..0000000000
--- a/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/allocator/allocator_traits.h>
-#include <thrust/detail/allocator/temporary_allocator.h>
-#include <thrust/pair.h>
-#include <map>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy, template<typename> class BasePolicy>
-  class cached_temporary_allocator
-    : public BasePolicy<cached_temporary_allocator<DerivedPolicy,BasePolicy> >
-{
-  private:
-    typedef thrust::detail::temporary_allocator<char,DerivedPolicy> base_allocator_type;
-    typedef thrust::detail::allocator_traits<base_allocator_type>   traits;
-    typedef typename traits::pointer                                  allocator_pointer;
-    typedef std::multimap<std::ptrdiff_t, void*>                      free_blocks_type;
-    typedef std::map<void *, std::ptrdiff_t>                          allocated_blocks_type;
-
-    base_allocator_type   m_base_allocator;
-    free_blocks_type      free_blocks;
-    allocated_blocks_type allocated_blocks;
-
-    void free_all()
-    {
-      // deallocate all outstanding blocks in both lists
-      for(free_blocks_type::iterator i = free_blocks.begin();
-          i != free_blocks.end();
-          ++i)
-      {
-        // transform the pointer to allocator_pointer before calling deallocate
-        traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast<char*>(i->second)), i->first);
-      }
-
-      for(allocated_blocks_type::iterator i = allocated_blocks.begin();
-          i != allocated_blocks.end();
-          ++i)
-      {
-        // transform the pointer to allocator_pointer before calling deallocate
-        traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast<char*>(i->first)), i->second);
-      }
-    }
-
-  public:
-    cached_temporary_allocator(thrust::execution_policy<DerivedPolicy> &system)
-      : m_base_allocator(system)
-    {}
-
-    ~cached_temporary_allocator()
-    {
-      // free all allocations when cached_allocator goes out of scope
-      free_all();
-    }
-
-    void *allocate(std::ptrdiff_t num_bytes)
-    {
-      void *result = 0;
-
-      // search the cache for a free block
-      free_blocks_type::iterator free_block = free_blocks.find(num_bytes);
-
-      if(free_block != free_blocks.end())
-      {
-        // get the pointer
-        result = free_block->second;
-
-        // erase from the free_blocks map
-        free_blocks.erase(free_block);
-      }
-      else
-      {
-        // no allocation of the right size exists
-        // create a new one with m_base_allocator
-        // allocate memory and convert to raw pointer
-        result = thrust::raw_pointer_cast(traits::allocate(m_base_allocator, num_bytes));
-      }
-
-      // insert the allocated pointer into the allocated_blocks map
-      allocated_blocks.insert(std::make_pair(result, num_bytes));
-
-      return result;
-    }
-
-    void deallocate(void *ptr)
-    {
-      // erase the allocated block from the allocated blocks map
-      allocated_blocks_type::iterator iter = allocated_blocks.find(ptr);
-      std::ptrdiff_t num_bytes = iter->second;
-      allocated_blocks.erase(iter);
-
-      // insert the block into the free blocks map
-      free_blocks.insert(std::make_pair(num_bytes, ptr));
-    }
-};
-
-
-// overload get_temporary_buffer on cached_temporary_allocator
-// note that we take a reference to cached_temporary_allocator
-template<typename T, typename DerivedPolicy, template<typename> class BasePolicy>
-  thrust::pair<T*, std::ptrdiff_t>
-    get_temporary_buffer(cached_temporary_allocator<DerivedPolicy,BasePolicy> &alloc, std::ptrdiff_t n)
-{
-  // ask the allocator for sizeof(T) * n bytes
-  T* result = reinterpret_cast<T*>(alloc.allocate(sizeof(T) * n));
-
-  // return the pointer and the number of elements allocated
-  return thrust::make_pair(result,n);
-}
-
-
-// overload return_temporary_buffer on cached_temporary_allocator
-// an overloaded return_temporary_buffer should always accompany
-// an overloaded get_temporary_buffer
-template<typename Pointer, typename DerivedPolicy, template<typename> class BasePolicy>
-  void return_temporary_buffer(cached_temporary_allocator<DerivedPolicy,BasePolicy> &alloc, Pointer p)
-{
-  // return the pointer to the allocator
-  alloc.deallocate(thrust::raw_pointer_cast(p));
-}
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.h b/compat/thrust/system/cuda/detail/detail/fast_scan.h
deleted file mode 100644
index d095a4a2db..0000000000
--- a/compat/thrust/system/cuda/detail/detail/fast_scan.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fast_scan.h
- *  \brief A fast scan for primitive types.
- */
-
-#pragma once
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace fast_scan
-{
-
-template <typename ExecutionPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator inclusive_scan(execution_policy<ExecutionPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator output,
-                              BinaryFunction binary_op);
-
-template <typename ExecutionPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename T,
-          typename BinaryFunction>
-OutputIterator exclusive_scan(execution_policy<ExecutionPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator output,
-                              const T init,
-                              BinaryFunction binary_op);
-
-} // end namespace fast_scan
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include "fast_scan.inl"
-
diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.inl b/compat/thrust/system/cuda/detail/detail/fast_scan.inl
deleted file mode 100644
index b02763d8a9..0000000000
--- a/compat/thrust/system/cuda/detail/detail/fast_scan.inl
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/detail/temporary_array.h>
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-
-#include <thrust/system/cuda/detail/reduce_intervals.h>
-#include <thrust/system/cuda/detail/synchronize.h>
-#include <thrust/system/cuda/detail/default_decomposition.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/uninitialized.h>
-#include <thrust/detail/raw_pointer_cast.h>
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-
-namespace thrust
-{
-namespace detail
-{
-
-// forward declaration of temporary_array
-template<typename,typename> class temporary_array;
-
-} // end detail
-
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace fast_scan
-{
-namespace fast_scan_detail
-{
-
-
-// TODO tune this
-template <typename ValueType>
-struct inclusive_scan_block_size
-{
-  private:
-  static const unsigned int max_memory         = 16384 - 256 - 2 * sizeof(ValueType);
-  static const unsigned int max_block_size     = max_memory / sizeof(ValueType);
-  static const unsigned int default_block_size = 7 * 32;
-  static const unsigned int block_size         = (max_block_size < default_block_size) ? max_block_size : default_block_size;
-
-  public:
-  static const unsigned int pass1 = block_size;
-  static const unsigned int pass2 = block_size;
-  static const unsigned int pass3 = block_size;
-};
-
-// TODO tune this
-template <typename ValueType>
-struct exclusive_scan_block_size
-{
-  private:
-  static const unsigned int max_memory         = 16384 - 256 - 2 * sizeof(ValueType);
-  static const unsigned int max_block_size     = max_memory / sizeof(ValueType);
-  static const unsigned int default_block_size = 5 * 32;
-  static const unsigned int block_size         = (max_block_size < default_block_size) ? max_block_size : default_block_size;
-
-  public:
-  static const unsigned int pass1 = block_size;
-  static const unsigned int pass2 = block_size;
-  static const unsigned int pass3 = block_size;
-};
-
-
-template <unsigned int CTA_SIZE,
-          typename Context,
-          typename SharedArray,
-          typename BinaryFunction>
-__device__ __thrust_forceinline__
-void scan_block(Context context, SharedArray array, BinaryFunction binary_op)
-{
-    typedef typename thrust::iterator_value<SharedArray>::type T;
-
-    T val = array[context.thread_index()];
-
-    if (CTA_SIZE >    1) { if(context.thread_index() >=    1) { T tmp = array[context.thread_index() -    1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    2) { if(context.thread_index() >=    2) { T tmp = array[context.thread_index() -    2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    4) { if(context.thread_index() >=    4) { T tmp = array[context.thread_index() -    4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    8) { if(context.thread_index() >=    8) { T tmp = array[context.thread_index() -    8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   16) { if(context.thread_index() >=   16) { T tmp = array[context.thread_index() -   16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   32) { if(context.thread_index() >=   32) { T tmp = array[context.thread_index() -   32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   64) { if(context.thread_index() >=   64) { T tmp = array[context.thread_index() -   64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >  128) { if(context.thread_index() >=  128) { T tmp = array[context.thread_index() -  128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >  256) { if(context.thread_index() >=  256) { T tmp = array[context.thread_index() -  256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }  
-    if (CTA_SIZE >  512) { if(context.thread_index() >=  512) { T tmp = array[context.thread_index() -  512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }  
-    if (CTA_SIZE > 1024) { if(context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }  
-}
-
-template <unsigned int CTA_SIZE,
-          typename Context,
-          typename SharedArray,
-          typename BinaryFunction>
-__device__ __thrust_forceinline__
-void scan_block_n(Context context, SharedArray array, const unsigned int n, BinaryFunction binary_op)
-{
-    typedef typename thrust::iterator_value<SharedArray>::type T;
-
-    T val = array[context.thread_index()];
-
-    if (CTA_SIZE >    1) { if(context.thread_index() < n && context.thread_index() >=    1) { T tmp = array[context.thread_index() -    1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    2) { if(context.thread_index() < n && context.thread_index() >=    2) { T tmp = array[context.thread_index() -    2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    4) { if(context.thread_index() < n && context.thread_index() >=    4) { T tmp = array[context.thread_index() -    4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >    8) { if(context.thread_index() < n && context.thread_index() >=    8) { T tmp = array[context.thread_index() -    8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   16) { if(context.thread_index() < n && context.thread_index() >=   16) { T tmp = array[context.thread_index() -   16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   32) { if(context.thread_index() < n && context.thread_index() >=   32) { T tmp = array[context.thread_index() -   32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >   64) { if(context.thread_index() < n && context.thread_index() >=   64) { T tmp = array[context.thread_index() -   64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >  128) { if(context.thread_index() < n && context.thread_index() >=  128) { T tmp = array[context.thread_index() -  128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >  256) { if(context.thread_index() < n && context.thread_index() >=  256) { T tmp = array[context.thread_index() -  256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE >  512) { if(context.thread_index() < n && context.thread_index() >=  512) { T tmp = array[context.thread_index() -  512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-    if (CTA_SIZE > 1024) { if(context.thread_index() < n && context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); }
-}
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool FullBlock,
-          typename Context,
-          typename InputIterator,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void load_block(Context context,
-                const unsigned int n,
-                InputIterator input,
-                ValueType (&sdata)[K][CTA_SIZE + 1])
-{
-  for(unsigned int k = 0; k < K; k++)
-  {
-    const unsigned int offset = k*CTA_SIZE + context.thread_index();
-
-    if (FullBlock || offset < n)
-    {
-      InputIterator temp = input + offset;
-      sdata[offset % K][offset / K] = *temp;
-    }
-  }
-
-  context.barrier();
-}
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool Inclusive,
-          bool FullBlock,
-          typename Context,
-          typename OutputIterator,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void store_block(Context context,
-                 const unsigned int n,
-                 OutputIterator output,
-                 ValueType (&sdata)[K][CTA_SIZE + 1],
-                 ValueType& carry)
-{
-  if (Inclusive)
-  {
-    for(unsigned int k = 0; k < K; k++)
-    {
-      const unsigned int offset = k*CTA_SIZE + context.thread_index();
-
-      if (FullBlock || offset < n)
-      {
-        OutputIterator temp = output + offset;
-        *temp = sdata[offset % K][offset / K];
-      }
-    }   
-  }
-  else
-  {
-    for(unsigned int k = 0; k < K; k++)
-    {
-      const unsigned int offset = k*CTA_SIZE + context.thread_index();
-
-      if (FullBlock || offset < n)
-      {
-        OutputIterator temp = output + offset;
-        *temp = (offset == 0) ? carry : sdata[(offset - 1) % K][(offset - 1) / K];
-      }
-    }   
-  }
-}
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool FullBlock,
-          typename Context,
-          typename InputIterator,
-          typename BinaryFunction,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void upsweep_body(Context context,
-                  const unsigned int n,
-                  const bool carry_in,
-                  InputIterator input,
-                  BinaryFunction binary_op,
-                  ValueType (&sdata)[K][CTA_SIZE + 1],
-                  ValueType& carry)
-{
-  // read data
-  load_block<CTA_SIZE,K,FullBlock>(context, n, input, sdata);
- 
-  // copy into local array
-  ValueType ldata[K];
-  for (unsigned int k = 0; k < K; k++)
-    ldata[k] = sdata[k][context.thread_index()];
-
-  // carry in
-  if (context.thread_index() == 0 && carry_in)
-  {
-    // XXX WAR sm_10 issue
-    ValueType tmp = carry;
-    ldata[0] = binary_op(tmp, ldata[0]);
-  }
-
-  // scan local values
-  for(unsigned int k = 1; k < K; k++)
-  {
-    const unsigned int offset = K * context.thread_index() + k;
-
-    if (FullBlock || offset < n)
-      ldata[k] = binary_op(ldata[k-1],ldata[k]);
-  }
-
-  sdata[K - 1][context.thread_index()] = ldata[K - 1];
-
-  context.barrier();
-
-  // second level scan
-  if (FullBlock && sizeof(ValueType) > 1) // TODO investigate why this WAR is necessary
-    scan_block<CTA_SIZE>(context, sdata[K - 1], binary_op); 
-  else
-    scan_block_n<CTA_SIZE>(context, sdata[K - 1], n / K, binary_op);
-
-  // store carry out
-  if (FullBlock)
-  {
-     if (context.thread_index() == CTA_SIZE - 1)
-        carry = sdata[K - 1][context.thread_index()];
-  }
-  else
-  {
-    if (context.thread_index() == (n - 1) / K)
-    {
-      ValueType sum;
-
-      for (unsigned int k = 0; k < K; k++)
-          if ((n - 1) % K == k)
-              sum = ldata[k];
-
-      if (context.thread_index() > 0)
-      {
-        // WAR sm_10 issue
-        ValueType tmp = sdata[K - 1][context.thread_index() - 1];
-        sum = binary_op(tmp, sum);
-      }
-
-      carry = sum;
-    }
-  }
-
-  context.barrier();
-}
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool Inclusive,
-          bool FullBlock,
-          typename Context,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void scan_body(Context context,
-               const unsigned int n,
-               const bool carry_in,
-               InputIterator input,
-               OutputIterator output,
-               BinaryFunction binary_op,
-               ValueType (&sdata)[K][CTA_SIZE + 1],
-               ValueType& carry)
-{
-  // read data
-  load_block<CTA_SIZE,K,FullBlock>(context, n, input, sdata);
-
-  // copy into local array
-  ValueType ldata[K];
-  for (unsigned int k = 0; k < K; k++)
-    ldata[k] = sdata[k][context.thread_index()];
-
-  // carry in
-  if (context.thread_index() == 0 && carry_in)
-  {
-    // XXX WAR sm_10 issue
-    ValueType tmp = carry;
-    ldata[0] = binary_op(tmp, ldata[0]);
-  }
-
-  // scan local values
-  for(unsigned int k = 1; k < K; k++)
-  {
-    const unsigned int offset = K * context.thread_index() + k;
-
-    if (FullBlock || offset < n)
-      ldata[k] = binary_op(ldata[k-1],ldata[k]);
-  }
-
-  sdata[K - 1][context.thread_index()] = ldata[K - 1];
-
-  context.barrier();
-
-  // second level scan
-  if (FullBlock)
-    scan_block<CTA_SIZE>(context, sdata[K - 1], binary_op);
-  else
-    scan_block_n<CTA_SIZE>(context, sdata[K - 1], n / K, binary_op);
-  
-  // update local values
-  if (context.thread_index() > 0)
-  {
-    ValueType left = sdata[K - 1][context.thread_index() - 1];
-
-    for(unsigned int k = 0; k < K; k++)
-    {
-      const unsigned int offset = K * context.thread_index() + k;
-
-      if (FullBlock || offset < n)
-        ldata[k] = binary_op(left, ldata[k]);
-    }
-  }
-
-  for (unsigned int k = 0; k < K; k++)
-    sdata[k][context.thread_index()] = ldata[k];
-
-  context.barrier();
-
-  // write data
-  store_block<CTA_SIZE, K, Inclusive, FullBlock>(context, n, output, sdata, carry);
-  
-  // store carry out
-  if (context.thread_index() == 0)
-  {
-    if (FullBlock)
-      carry = sdata[K - 1][CTA_SIZE - 1];
-    else
-      carry = sdata[(n - 1) % K][(n - 1) / K]; // note: this must come after the local update
-  }
-
-  context.barrier();
-}
-
-template <typename InputIterator,
-          typename ValueType,
-          typename BinaryFunction,
-          typename Decomposition,
-          typename Context>
-struct upsweep_intervals_closure
-{
-  InputIterator  input;
-  ValueType *    block_results; // TODO change this to ValueIterator
-  BinaryFunction binary_op;
-  Decomposition  decomp;
-  Context        context;
-  
-  typedef Context context_type;
-
-  upsweep_intervals_closure(InputIterator input,
-                            ValueType * block_results,
-                            BinaryFunction binary_op,
-                            Decomposition decomp,
-                            Context context = Context())
-    : input(input), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename Decomposition::index_type  IndexType;
-
-    const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value;
-
-#if __CUDA_ARCH__ >= 200
-    const unsigned int SMEM = (48 * 1024);
-#else
-    const unsigned int SMEM = (16 * 1024) - 256;
-#endif
-    const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType)) / (sizeof(ValueType) * (CTA_SIZE + 1)));
-    const unsigned int K     = (MAX_K < 6) ? MAX_K : 6;
-
-    __shared__ uninitialized<ValueType[K][CTA_SIZE + 1]> sdata; // padded to avoid bank conflicts
-    
-    __shared__ uninitialized<ValueType> carry; // storage for carry out
-    if(context.thread_index() == 0) carry.construct();
-    
-    context.barrier();
-    
-    thrust::system::detail::internal::index_range<IndexType> interval = decomp[context.block_index()];
-
-    IndexType base = interval.begin();
-
-    input += base;
-
-    const unsigned int unit_size = K * CTA_SIZE;
-
-    bool carry_in = false;
-
-    // process full units
-    while (base + unit_size <= interval.end())
-    {
-      const unsigned int n = unit_size;
-      upsweep_body<CTA_SIZE,K,true>(context, n, carry_in, input, binary_op, sdata.get(), carry.get());
-      base   += unit_size;
-      input  += unit_size;
-      carry_in = true;
-    }
-
-    // process partially full unit at end of input (if necessary)
-    if (base < interval.end())
-    {
-      const unsigned int n = interval.end() - base;
-      upsweep_body<CTA_SIZE,K,false>(context, n, carry_in, input, binary_op, sdata.get(), carry.get());
-    }
-
-    // write interval sum
-    if (context.thread_index() == 0)
-      block_results[context.block_index()] = carry;
-  }
-};
-
-
-template <bool Inclusive,
-          typename InputIterator,
-          typename OutputIterator,
-          typename ValueType,
-          typename BinaryFunction,
-          typename Decomposition,
-          typename Context>
-struct downsweep_intervals_closure
-{
-  InputIterator  input;
-  OutputIterator output;
-  ValueType *    block_results;
-  BinaryFunction binary_op;
-  Decomposition  decomp;
-  Context        context;
-
-  typedef Context context_type;
-
-  downsweep_intervals_closure(InputIterator input,
-                              OutputIterator output,
-                              ValueType * block_results,
-                              BinaryFunction binary_op,
-                              Decomposition decomp,
-                              Context context = Context())
-    : input(input), output(output), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename Decomposition::index_type IndexType;
-    
-    const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value;
-
-#if __CUDA_ARCH__ >= 200
-    const unsigned int SMEM = (48 * 1024);
-#else
-    const unsigned int SMEM = (16 * 1024) - 256;
-#endif
-    const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType))/ (sizeof(ValueType) * (CTA_SIZE + 1)));
-    const unsigned int K     = (MAX_K < 6) ? MAX_K : 6;
-
-    __shared__ uninitialized<ValueType[K][CTA_SIZE + 1]> sdata;  // padded to avoid bank conflicts
-    
-    __shared__ uninitialized<ValueType> carry; // storage for carry in and carry out
-    if(context.thread_index() == 0) carry.construct();
-
-    context.barrier();
-
-    thrust::system::detail::internal::index_range<IndexType> interval = decomp[context.block_index()];
-
-    IndexType base = interval.begin();
-
-    input  += base;
-    output += base;
-
-    const unsigned int unit_size = K * CTA_SIZE;
-
-    bool carry_in  = (Inclusive && context.block_index() == 0) ? false : true;
-
-    if (carry_in)
-    {
-      if (context.thread_index() == 0)
-        carry = block_results[context.block_index()];
-      context.barrier();
-    }
-
-    // process full units
-    while (base + unit_size <= interval.end())
-    {
-      const unsigned int n = unit_size;
-      scan_body<CTA_SIZE,K,Inclusive,true>(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get());
-      base   += K * CTA_SIZE;
-      input  += K * CTA_SIZE;
-      output += K * CTA_SIZE;
-      carry_in = true;
-    }
-
-    // process partially full unit at end of input (if necessary)
-    if (base < interval.end())
-    {
-      const unsigned int n = interval.end() - base;
-      scan_body<CTA_SIZE,K,Inclusive,false>(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get());
-    }
-  }
-};
-
-
-} // end namespace fast_scan_detail
-
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator inclusive_scan(execution_policy<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator output,
-                              BinaryFunction binary_op)
-{
-  using namespace fast_scan_detail;
-
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::has_result_type<BinaryFunction>::value,
-    thrust::detail::result_type<BinaryFunction>,
-    thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  typedef unsigned int                                                       IndexType;
-  typedef thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
-  typedef thrust::detail::temporary_array<ValueType,DerivedPolicy>           ValueArray;
-
-  if (first == last)
-      return output;
-
-  Decomposition decomp = thrust::system::cuda::detail::default_decomposition<IndexType>(last - first);
-
-  ValueArray block_results(exec, decomp.size());
-  
-  // compute sum over each interval
-  if (thrust::detail::is_commutative<BinaryFunction>::value)
-  {
-    // use reduce_intervals for commutative operators
-    thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin(), binary_op, decomp);
-  }
-  else
-  {
-    const static unsigned int ThreadsPerBlock = inclusive_scan_block_size<ValueType>::pass1;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef upsweep_intervals_closure<InputIterator,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(first,
-                    thrust::raw_pointer_cast(&block_results[0]),
-                    binary_op,
-                    decomp);
-    detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-  }
-
-  // second level inclusive scan of per-block results
-  {
-    const static unsigned int ThreadsPerBlock = inclusive_scan_block_size<ValueType>::pass2;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef downsweep_intervals_closure<true,ValueType*,ValueType*,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(thrust::raw_pointer_cast(&block_results[0]),
-                    thrust::raw_pointer_cast(&block_results[0]),
-                    thrust::raw_pointer_cast(&block_results[0]), // not used
-                    binary_op,
-                    Decomposition(decomp.size(), 1, 1));
-    detail::launch_closure(closure, 1, ThreadsPerBlock);
-  }
-  
-  // update intervals with result of second level scan
-  {
-    const static unsigned int ThreadsPerBlock = inclusive_scan_block_size<ValueType>::pass3;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef downsweep_intervals_closure<true,InputIterator,OutputIterator,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(first,
-                    output,
-                    thrust::raw_pointer_cast(&block_results[0]) - 1, // shift block results
-                    binary_op,
-                    decomp);
-    detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-  }
-  
-  return output + (last - first);
-}
-
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename T,
-          typename BinaryFunction>
-OutputIterator exclusive_scan(execution_policy<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator output,
-                              const T init,
-                              BinaryFunction binary_op)
-{
-  using namespace fast_scan_detail;
-
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::has_result_type<BinaryFunction>::value,
-    thrust::detail::result_type<BinaryFunction>,
-    thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  typedef unsigned int                                                       IndexType;
-  typedef thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
-  typedef thrust::detail::temporary_array<ValueType,DerivedPolicy>           ValueArray;
-
-  if (first == last)
-      return output;
-
-  Decomposition decomp = thrust::system::cuda::detail::default_decomposition<IndexType>(last - first);
-
-  ValueArray block_results(exec, decomp.size() + 1);
-  
-  // compute sum over each interval
-  if (thrust::detail::is_commutative<BinaryFunction>::value)
-  {
-    // use reduce_intervals for commutative operators
-    thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin() + 1, binary_op, decomp);
-  }
-  else
-  {
-    const static unsigned int ThreadsPerBlock = exclusive_scan_block_size<ValueType>::pass1;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef upsweep_intervals_closure<InputIterator,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(first,
-                    thrust::raw_pointer_cast(&block_results[0]) + 1,
-                    binary_op,
-                    decomp);
-    detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-  }
-
-  // place init before per-block results
-  block_results[0] = init;
-  
-  // second level inclusive scan of per-block results
-  {
-    const static unsigned int ThreadsPerBlock = exclusive_scan_block_size<ValueType>::pass2;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef downsweep_intervals_closure<true,ValueType*,ValueType*,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(thrust::raw_pointer_cast(&block_results[0]),
-                    thrust::raw_pointer_cast(&block_results[0]),
-                    thrust::raw_pointer_cast(&block_results[0]), // not used
-                    binary_op,
-                    Decomposition(decomp.size() + 1, 1, 1));
-    detail::launch_closure(closure, 1, ThreadsPerBlock);
-  }
-  
-  // update intervals with result of second level scan
-  {
-    const static unsigned int ThreadsPerBlock = exclusive_scan_block_size<ValueType>::pass3;
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-
-    typedef downsweep_intervals_closure<false,InputIterator,OutputIterator,ValueType,BinaryFunction,Decomposition,Context> Closure;
-    Closure closure(first,
-                    output,
-                    thrust::raw_pointer_cast(&block_results[0]), // shift block results
-                    binary_op,
-                    decomp);
-    detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-  }
-  
-  return output + (last - first);
-}
-
-
-} // end namespace fast_scan
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.h b/compat/thrust/system/cuda/detail/detail/launch_calculator.h
deleted file mode 100644
index 5126aa6f1a..0000000000
--- a/compat/thrust/system/cuda/detail/detail/launch_calculator.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#include <thrust/system/cuda/detail/cuda_launch_config.h>
-#include <thrust/tuple.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template <typename Closure>
-class launch_calculator
-{
-  device_properties_t   properties;
-  function_attributes_t attributes;
-
-  public:
-  
-  launch_calculator(void);
-
-  launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes);
-
-  thrust::tuple<size_t,size_t,size_t> with_variable_block_size(void) const;
-
-  template <typename UnaryFunction>
-  thrust::tuple<size_t,size_t,size_t> with_variable_block_size(UnaryFunction block_size_to_smem_size) const;
-  
-  thrust::tuple<size_t,size_t,size_t> with_variable_block_size_available_smem(void) const;
-
-  private:
-
-  /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor)
-   *  where num_threads_per_block is a valid block size for an instance of Closure
-   *  chosen by a heuristic and num_blocks_per_multiprocessor is the maximum
-   *  number of such blocks that can execute on a streaming multiprocessor at once.
-   */
-  thrust::pair<size_t, size_t> default_block_configuration() const;
-
-  /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor)
-   *  where num_threads_per_block is a valid block size for an instance of Closure
-   *  chosen by a heuristic and num_blocks_per_multiprocessor is the maximum
-   *  number of such blocks that can execute on a streaming multiprocessor at once.
-   *
-   *  \param block_size_to_smem_size Mapping from num_threads_per_block to number of
-   *                                 dynamically-allocated bytes of shared memory
-   */
-  template<typename UnaryFunction>
-  thrust::pair<size_t, size_t> default_block_configuration(UnaryFunction block_size_to_smem_size) const;
-};
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/detail/launch_calculator.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.inl b/compat/thrust/system/cuda/detail/detail/launch_calculator.inl
deleted file mode 100644
index b851d5fe13..0000000000
--- a/compat/thrust/system/cuda/detail/detail/launch_calculator.inl
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-// do not attempt to compile this file with any other compiler
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-#include <thrust/system/cuda/detail/cuda_launch_config.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template <typename Closure>
-launch_calculator<Closure>::launch_calculator(void)
-  : properties(device_properties()),
-    attributes(closure_attributes<Closure>())
-{}
-  
-template <typename Closure>
-launch_calculator<Closure>::launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes)
-  : properties(properties),
-    attributes(attributes)
-{}
-
-template <typename Closure>
-  template <typename UnaryFunction>
-thrust::pair<size_t, size_t> launch_calculator<Closure>::default_block_configuration(UnaryFunction block_size_to_smem_size) const
-{
-  // choose a block size
-  std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties, block_size_to_smem_size);
-
-  // choose a subscription rate
-  std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block;
-
-  return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor);
-}
-
-
-template <typename Closure>
-thrust::pair<size_t, size_t> launch_calculator<Closure>::default_block_configuration(void) const
-{
-  // choose a block size
-  std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties);
-
-  // choose a subscription rate
-  std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block;
-
-  return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor);
-}
-
-template <typename Closure>
-thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(void) const
-{
-  thrust::pair<size_t, size_t> config = default_block_configuration();
-  return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, 0);
-}
-
-template <typename Closure>
-  template <typename UnaryFunction>
-thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(UnaryFunction block_size_to_smem_size) const
-{
-  thrust::pair<size_t, size_t> config = default_block_configuration(block_size_to_smem_size);
-  return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, block_size_to_smem_size(config.first));
-}
-  
-template <typename Closure>
-thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size_available_smem(void) const
-{
-  thrust::pair<size_t, size_t> config = default_block_configuration();
-  size_t smem_per_block = proportional_smem_allocation(properties, attributes, config.second);
-  return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, smem_per_block);
-}
-
-} // end detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-
diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.h b/compat/thrust/system/cuda/detail/detail/launch_closure.h
deleted file mode 100644
index c2e6c4344f..0000000000
--- a/compat/thrust/system/cuda/detail/detail/launch_closure.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/cuda/detail/cuda_launch_config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template <unsigned int _ThreadsPerBlock = 0,
-          unsigned int _BlocksPerMultiprocessor = 0>
-struct launch_bounds
-{
-  typedef thrust::detail::integral_constant<unsigned int, _ThreadsPerBlock>         ThreadsPerBlock;
-  typedef thrust::detail::integral_constant<unsigned int, _BlocksPerMultiprocessor> BlocksPerMultiprocessor;
-};
-
-struct thread_array : public launch_bounds<>
-{
-// CUDA built-in variables require nvcc
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; }
-  __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return blockDim.x * gridDim.x; } 
-#else
-  __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return 0; } 
-#endif // THRUST_DEVICE_COMPILER_NVCC
-};
-
-struct blocked_thread_array : public launch_bounds<>
-{
-// CUDA built-in variables require nvcc
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  __device__ __thrust_forceinline__ unsigned int thread_index(void)    const { return threadIdx.x; }
-  __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return blockDim.x;  } 
-  __device__ __thrust_forceinline__ unsigned int block_index(void)     const { return blockIdx.x;  }
-  __device__ __thrust_forceinline__ unsigned int grid_dimension(void)  const { return gridDim.x;   }
-  __device__ __thrust_forceinline__ unsigned int linear_index(void)    const { return block_dimension() * block_index() + thread_index(); }
-  __device__ __thrust_forceinline__ void         barrier(void)               { __syncthreads();    }
-#else
-  __device__ __thrust_forceinline__ unsigned int thread_index(void)    const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int block_index(void)     const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int grid_dimension(void)  const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int linear_index(void)    const { return 0; }
-  __device__ __thrust_forceinline__ void         barrier(void)               {           }
-#endif // THRUST_DEVICE_COMPILER_NVCC
-};
-
-template <unsigned int _ThreadsPerBlock>
-struct statically_blocked_thread_array : public launch_bounds<_ThreadsPerBlock,1>
-{
-// CUDA built-in variables require nvcc
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  __device__ __thrust_forceinline__ unsigned int thread_index(void)    const { return threadIdx.x;      }
-  __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return _ThreadsPerBlock; } // minor optimization
-  __device__ __thrust_forceinline__ unsigned int block_index(void)     const { return blockIdx.x;       }
-  __device__ __thrust_forceinline__ unsigned int grid_dimension(void)  const { return gridDim.x;        }
-  __device__ __thrust_forceinline__ unsigned int linear_index(void)    const { return block_dimension() * block_index() + thread_index(); }
-  __device__ __thrust_forceinline__ void         barrier(void)               { __syncthreads();    }
-#else
-  __device__ __thrust_forceinline__ unsigned int thread_index(void)    const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int block_index(void)     const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int grid_dimension(void)  const { return 0; }
-  __device__ __thrust_forceinline__ unsigned int linear_index(void)    const { return 0; }
-  __device__ __thrust_forceinline__ void         barrier(void)               {           }
-#endif // THRUST_DEVICE_COMPILER_NVCC
-};
-
-template<typename Closure, typename Size1, typename Size2>
-  void launch_closure(Closure f, Size1 num_blocks, Size2 block_size);
-
-template<typename Closure, typename Size1, typename Size2, typename Size3>
-  void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size);
-
-/*! Returns a copy of the cudaFuncAttributes structure
- *  that is associated with a given Closure
- */
-template <typename Closure>
-function_attributes_t closure_attributes(void);
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/detail/launch_closure.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.inl b/compat/thrust/system/cuda/detail/detail/launch_closure.inl
deleted file mode 100644
index ce39cfc136..0000000000
--- a/compat/thrust/system/cuda/detail/detail/launch_closure.inl
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-#include <thrust/system/cuda/detail/synchronize.h>
-#include <thrust/system/cuda/detail/detail/launch_calculator.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-// XXX WAR circular inclusion problems with this forward declaration
-template<typename, typename> class temporary_array;
-
-} // end detail
-
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-template<typename Closure>
-__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value)
-void launch_closure_by_value(Closure f)
-{
-  f();
-}
-
-template<typename Closure>
-__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value)
-void launch_closure_by_pointer(const Closure *f)
-{
-  // copy to registers
-  Closure f_reg = *f;
-  f_reg();
-}
-#else
-template<typename Closure>
-void launch_closure_by_value(Closure) {}
-
-template<typename Closure>
-void launch_closure_by_pointer(const Closure *) {}
-
-#endif // THRUST_DEVICE_COMPILER_NVCC
-
-template<typename Closure,
-         bool launch_by_value = sizeof(Closure) <= 256>
-  struct closure_launcher_base
-{
-  typedef void (*launch_function_t)(Closure); 
- 
-  static launch_function_t get_launch_function(void)
-  {
-    return launch_closure_by_value<Closure>;
-  }
-
-  template<typename Size1, typename Size2, typename Size3>
-  static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size)
-  {
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-    if(num_blocks > 0)
-    {
-      launch_closure_by_value<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>(f);
-      synchronize_if_enabled("launch_closure_by_value");
-    }
-#endif // THRUST_DEVICE_COMPILER_NVCC
-  }
-}; // end closure_launcher_base
-
-
-template<typename Closure>
-  struct closure_launcher_base<Closure,false>
-{
-  typedef void (*launch_function_t)(const Closure *); 
- 
-  static launch_function_t get_launch_function(void)
-  {
-    return launch_closure_by_pointer<Closure>;
-  }
-
-  template<typename Size1, typename Size2, typename Size3>
-  static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size)
-  {
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-    if(num_blocks > 0)
-    {
-      // use temporary storage for the closure
-      // XXX use of cuda::tag is too specific here
-      thrust::cuda::tag cuda_tag;
-      thrust::host_system_tag host_tag;
-      thrust::detail::temporary_array<Closure,thrust::cuda::tag> closure_storage(cuda_tag, host_tag, &f, &f + 1);
-
-      // launch
-      detail::launch_closure_by_pointer<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>((&closure_storage[0]).get());
-      synchronize_if_enabled("launch_closure_by_pointer");
-    }
-#endif // THRUST_DEVICE_COMPILER_NVCC
-  }
-};
-
-
-template<typename Closure>
-  struct closure_launcher
-    : public closure_launcher_base<Closure>
-{
-  typedef closure_launcher_base<Closure> super_t;
-  
-  static inline const device_properties_t& device_properties(void)
-  {
-    return device_properties();
-  }
-  
-  static inline function_attributes_t function_attributes(void)
-  {
-    return thrust::system::cuda::detail::function_attributes(super_t::get_launch_function());
-  }
-
-  template<typename Size1, typename Size2, typename Size3>
-  static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size)
-  {
-    super_t::launch(f,num_blocks,block_size,smem_size);
-  }
-};
-
-template<typename Closure, typename Size>
-  void launch_closure(Closure f, Size num_blocks)
-{
-  launch_calculator<Closure> calculator;
-  launch_closure(f, num_blocks, thrust::get<1>(calculator.with_variable_block_size()));
-} // end launch_closure()
-
-template<typename Closure, typename Size1, typename Size2>
-  void launch_closure(Closure f, Size1 num_blocks, Size2 block_size)
-{
-  launch_closure(f, num_blocks, block_size, 0u);
-} // end launch_closure()
-
-template<typename Closure, typename Size1, typename Size2, typename Size3>
-  void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size)
-{
-  closure_launcher<Closure>::launch(f, num_blocks, block_size, smem_size);
-} // end launch_closure()
-
-  
-template <typename Closure>
-function_attributes_t closure_attributes(void)
-{
-  typedef closure_launcher<Closure> Launcher;
-
-  // cache the result of function_attributes(), because it is slow
-  // only cache the first few devices
-  static const int max_num_devices                                  = 16;
-
-  static bool attributes_exist[max_num_devices]                     = {0};
-  static function_attributes_t function_attributes[max_num_devices] = {};
-
-  // XXX device_id ought to be an argument to this function
-  int device_id = current_device();
-
-  if(device_id >= max_num_devices)
-  {
-    return thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function());
-  }
-
-  if(!attributes_exist[device_id])
-  {
-    function_attributes[device_id] = thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function());
-
-    // disallow the compiler to move the write to attributes_exist[device_id]
-    // before the initialization of function_attributes[device_id]
-    __thrust_compiler_fence();
-
-    attributes_exist[device_id] = true;
-  }
-
-  return function_attributes[device_id];
-}
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.h b/compat/thrust/system/cuda/detail/detail/set_operation.h
deleted file mode 100644
index 5475731edb..0000000000
--- a/compat/thrust/system/cuda/detail/detail/set_operation.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename Compare,
-         typename SetOperation>
-  RandomAccessIterator3 set_operation(execution_policy<DerivedPolicy> &exec,
-                                      RandomAccessIterator1 first1,
-                                      RandomAccessIterator1 last1,
-                                      RandomAccessIterator2 first2,
-                                      RandomAccessIterator2 last2,
-                                      RandomAccessIterator3 result,
-                                      Compare comp,
-                                      SetOperation set_op);
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/detail/set_operation.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.inl b/compat/thrust/system/cuda/detail/detail/set_operation.inl
deleted file mode 100644
index 3f14379628..0000000000
--- a/compat/thrust/system/cuda/detail/detail/set_operation.inl
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/detail/set_operation.h>
-#include <thrust/system/cuda/detail/detail/balanced_path.h>
-#include <thrust/system/cuda/detail/block/inclusive_scan.h>
-#include <thrust/system/cuda/detail/block/exclusive_scan.h>
-#include <thrust/system/cuda/detail/block/copy.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/transform.h>
-#include <thrust/scan.h>
-#include <thrust/pair.h>
-#include <thrust/detail/util/blocking.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/detail/minmax.h>
-
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace set_operation_detail
-{
-
-
-using thrust::system::cuda::detail::detail::statically_blocked_thread_array;
-using thrust::detail::uint16_t;
-using thrust::detail::uint32_t;
-
-
-// empirically determined on sm_20
-// value_types larger than this will fail to launch if placed in smem
-template<typename T>
-  struct stage_through_smem
-{
-  static const bool value = sizeof(T) <= 6 * sizeof(uint32_t);
-};
-
-
-// max_input_size <= 32
-template<typename Size, typename InputIterator, typename OutputIterator>
-inline __device__
-  OutputIterator serial_bounded_copy_if(Size max_input_size,
-                                        InputIterator first,
-                                        uint32_t mask,
-                                        OutputIterator result)
-{
-  for(Size i = 0; i < max_input_size; ++i, ++first)
-  {
-    if((1<<i) & mask)
-    {
-      *result = *first;
-      ++result;
-    }
-  }
-
-  return result;
-}
-
-
-template<typename Size, typename InputIterator1, typename InputIterator2, typename Compare>
-  struct find_partition_offsets_functor
-{
-  Size partition_size;
-  InputIterator1 first1;
-  InputIterator2 first2;
-  Size n1, n2;
-  Compare comp;
-
-  find_partition_offsets_functor(Size partition_size,
-                                 InputIterator1 first1, InputIterator1 last1,
-                                 InputIterator2 first2, InputIterator2 last2,
-                                 Compare comp)
-    : partition_size(partition_size),
-      first1(first1), first2(first2),
-      n1(last1 - first1), n2(last2 - first2),
-      comp(comp)
-  {}
-
-  inline __host__ __device__
-  thrust::pair<Size,Size> operator()(Size i) const
-  {
-    Size diag = thrust::min(n1 + n2, i * partition_size);
-
-    // XXX the correctness of balanced_path depends critically on the ll suffix below
-    //     why???
-    return balanced_path(first1, n1, first2, n2, diag, 4ll, comp);
-  }
-};
-
-
-template<typename Size, typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
-  OutputIterator find_partition_offsets(thrust::cuda::execution_policy<DerivedPolicy> &exec,
-                                        Size num_partitions,
-                                        Size partition_size,
-                                        InputIterator1 first1, InputIterator1 last1,
-                                        InputIterator2 first2, InputIterator2 last2,
-                                        OutputIterator result,
-                                        Compare comp)
-{
-  find_partition_offsets_functor<Size,InputIterator1,InputIterator2,Compare> f(partition_size, first1, last1, first2, last2, comp);
-
-  return thrust::transform(exec,
-                           thrust::counting_iterator<Size>(0),
-                           thrust::counting_iterator<Size>(num_partitions),
-                           result,
-                           f);
-}
-
-
-namespace block
-{
-
-
-template<unsigned int block_size, typename T>
-inline __device__
-T right_neighbor(statically_blocked_thread_array<block_size> &ctx, const T &x, const T &boundary)
-{
-  // stage this shift to conserve smem
-  const unsigned int storage_size = block_size / 2;
-  __shared__ uninitialized_array<T,storage_size> shared;
-
-  T result = x;
-
-  unsigned int tid = ctx.thread_index();
-
-  if(0 < tid && tid <= storage_size)
-  {
-    shared[tid - 1] = x;
-  }
-
-  ctx.barrier();
-
-  if(tid < storage_size)
-  {
-    result = shared[tid];
-  }
-
-  ctx.barrier();
-  
-  tid -= storage_size;
-  if(0 < tid && tid <= storage_size)
-  {
-    shared[tid - 1] = x;
-  }
-  else if(tid == 0)
-  {
-    shared[storage_size-1] = boundary;
-  }
-
-  ctx.barrier();
-
-  if(tid < storage_size)
-  {
-    result = shared[tid];
-  }
-
-  ctx.barrier();
-
-  return result;
-}
-
-
-template<uint16_t block_size, uint16_t work_per_thread, typename InputIterator1, typename InputIterator2, typename Compare, typename SetOperation>
-inline __device__
-  unsigned int bounded_count_set_operation_n(statically_blocked_thread_array<block_size> &ctx,
-                                             InputIterator1 first1, uint16_t n1,
-                                             InputIterator2 first2, uint16_t n2,
-                                             Compare comp,
-                                             SetOperation set_op)
-{
-  unsigned int thread_idx = ctx.thread_index();
-
-  // find partition offsets
-  uint16_t diag = thrust::min<uint16_t>(n1 + n2, thread_idx * work_per_thread);
-  thrust::pair<uint16_t,uint16_t> thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp);
-  thrust::pair<uint16_t,uint16_t> thread_input_end   = block::right_neighbor<block_size>(ctx, thread_input_begin, thrust::make_pair(n1,n2));
-
-  __shared__ uint16_t s_thread_output_size[block_size];
-
-  // work_per_thread + 1 to accomodate a "starred" partition returned from balanced_path above
-  s_thread_output_size[thread_idx] =
-    set_op.count(work_per_thread + 1,
-                 first1 + thread_input_begin.first,  first1 + thread_input_end.first,
-                 first2 + thread_input_begin.second, first2 + thread_input_end.second,
-                 comp);
-
-  ctx.barrier();
-
-  // reduce per-thread counts
-  thrust::system::cuda::detail::block::inplace_inclusive_scan(ctx, s_thread_output_size);
-  return s_thread_output_size[ctx.block_dimension() - 1];
-}
-
-
-inline __device__ int pop_count(unsigned int x)
-{
-// guard use of __popc from other compilers
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  return __popc(x);
-#else
-  return x;
-#endif
-}
-
-
-
-template<uint16_t block_size, uint16_t work_per_thread, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare, typename SetOperation>
-inline __device__
-  OutputIterator bounded_set_operation_n(statically_blocked_thread_array<block_size> &ctx,
-                                         InputIterator1 first1, uint16_t n1,
-                                         InputIterator2 first2, uint16_t n2,
-                                         OutputIterator result,
-                                         Compare comp,
-                                         SetOperation set_op)
-{
-  unsigned int thread_idx = ctx.thread_index();
-  
-  // find partition offsets
-  uint16_t diag = thrust::min<uint16_t>(n1 + n2, thread_idx * work_per_thread);
-  thrust::pair<uint16_t,uint16_t> thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp);
-  thrust::pair<uint16_t,uint16_t> thread_input_end   = block::right_neighbor<block_size>(ctx, thread_input_begin, thrust::make_pair(n1,n2));
-
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  // +1 to accomodate a "starred" partition returned from balanced_path above
-  uninitialized_array<value_type, work_per_thread + 1> sparse_result;
-  uint32_t active_mask =
-    set_op(work_per_thread + 1,
-           first1 + thread_input_begin.first,  first1 + thread_input_end.first,
-           first2 + thread_input_begin.second, first2 + thread_input_end.second,
-           sparse_result.begin(),
-           comp);
-
-  __shared__ uint16_t s_thread_output_size[block_size];
-  s_thread_output_size[thread_idx] = pop_count(active_mask);
-
-  ctx.barrier();
-
-  // scan to turn per-thread counts into output indices
-  uint16_t block_output_size = thrust::system::cuda::detail::block::inplace_exclusive_scan(ctx, s_thread_output_size, 0u);
-
-  serial_bounded_copy_if(work_per_thread + 1, sparse_result.begin(), active_mask, result + s_thread_output_size[thread_idx]);
-
-  ctx.barrier();
-
-  return result + block_output_size;
-}
-
-
-template<uint16_t block_size, uint16_t work_per_thread, typename InputIterator1, typename InputIterator2, typename Compare, typename SetOperation>
-inline __device__
-  typename thrust::iterator_difference<InputIterator1>::type
-    count_set_operation(statically_blocked_thread_array<block_size> &ctx,
-                        InputIterator1 first1, InputIterator1 last1,
-                        InputIterator2 first2, InputIterator2 last2,
-                        Compare comp,
-                        SetOperation set_op)
-{
-  typedef typename thrust::iterator_difference<InputIterator1>::type difference;
-
-  difference result = 0;
-
-  thrust::pair<difference,difference> remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2);
-
-  // iterate until the input is consumed
-  while(remaining_input_size.first + remaining_input_size.second > 0)
-  {
-    // find the end of this subpartition's input
-    // -1 to accomodate "starred" partitions
-    uint16_t max_subpartition_size = block_size * work_per_thread - 1;
-    difference diag = thrust::min<difference>(remaining_input_size.first + remaining_input_size.second, max_subpartition_size);
-    thrust::pair<uint16_t,uint16_t> subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp);
-  
-    typedef typename thrust::iterator_value<InputIterator2>::type value_type;
-    if(stage_through_smem<value_type>::value)
-    {
-      // load the input into __shared__ storage
-      __shared__ uninitialized_array<value_type, block_size * work_per_thread> s_input;
-  
-      value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first,  s_input.begin());
-      value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1);
-  
-      result += block::bounded_count_set_operation_n<block_size,work_per_thread>(ctx,
-                                                                                 s_input.begin(), subpartition_size.first,
-                                                                                 s_input_end1,    subpartition_size.second,
-                                                                                 comp,
-                                                                                 set_op);
-    }
-    else
-    {
-      result += block::bounded_count_set_operation_n<block_size,work_per_thread>(ctx,
-                                                                                 first1, subpartition_size.first,
-                                                                                 first2, subpartition_size.second,
-                                                                                 comp,
-                                                                                 set_op);
-    }
-
-    // advance input
-    first1 += subpartition_size.first;
-    first2 += subpartition_size.second;
-
-    // decrement remaining size
-    remaining_input_size.first  -= subpartition_size.first;
-    remaining_input_size.second -= subpartition_size.second;
-  }
-
-  return result;
-}
-
-
-template<uint16_t block_size, uint16_t work_per_thread, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare, typename SetOperation>
-inline __device__
-OutputIterator set_operation(statically_blocked_thread_array<block_size> &ctx,
-                             InputIterator1 first1, InputIterator1 last1,
-                             InputIterator2 first2, InputIterator2 last2,
-                             OutputIterator result,
-                             Compare comp,
-                             SetOperation set_op)
-{
-  typedef typename thrust::iterator_difference<InputIterator1>::type difference;
-
-  thrust::pair<difference,difference> remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2);
-
-  // iterate until the input is consumed
-  while(remaining_input_size.first + remaining_input_size.second > 0)
-  {
-    // find the end of this subpartition's input
-    // -1 to accomodate "starred" partitions
-    uint16_t max_subpartition_size = block_size * work_per_thread - 1;
-    difference diag = thrust::min<difference>(remaining_input_size.first + remaining_input_size.second, max_subpartition_size);
-    thrust::pair<uint16_t,uint16_t> subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp);
-    
-    typedef typename thrust::iterator_value<InputIterator2>::type value_type;
-    if(stage_through_smem<value_type>::value)
-    {
-      // load the input into __shared__ storage
-      __shared__ uninitialized_array<value_type, block_size * work_per_thread> s_input;
-  
-      value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first,  s_input.begin());
-      value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1);
-  
-      result = block::bounded_set_operation_n<block_size,work_per_thread>(ctx,
-                                                                          s_input.begin(), subpartition_size.first,
-                                                                          s_input_end1,    subpartition_size.second,
-                                                                          result,
-                                                                          comp,
-                                                                          set_op);
-    }
-    else
-    {
-      result = block::bounded_set_operation_n<block_size,work_per_thread>(ctx,
-                                                                          first1, subpartition_size.first,
-                                                                          first2, subpartition_size.second,
-                                                                          result,
-                                                                          comp,
-                                                                          set_op);
-    }
-  
-    // advance input
-    first1 += subpartition_size.first;
-    first2 += subpartition_size.second;
-
-    // decrement remaining size
-    remaining_input_size.first  -= subpartition_size.first;
-    remaining_input_size.second -= subpartition_size.second;
-  }
-
-  return result;
-}
-
-
-} // end namespace block
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename OutputIterator, typename Compare, typename SetOperation>
-  inline __device__ void count_set_operation(statically_blocked_thread_array<threads_per_block> &ctx,
-                                             InputIterator1                                      input_partition_offsets,
-                                             Size                                                num_partitions,
-                                             InputIterator2                                      first1,
-                                             InputIterator3                                      first2,
-                                             OutputIterator                                      result,
-                                             Compare                                             comp,
-                                             SetOperation                                        set_op)
-{
-  // consume partitions
-  for(Size partition_idx = ctx.block_index();
-      partition_idx < num_partitions;
-      partition_idx += ctx.grid_dimension())
-  {
-    typedef typename thrust::iterator_difference<InputIterator2>::type difference;
-
-    // find the partition
-    thrust::pair<difference,difference> block_input_begin = input_partition_offsets[partition_idx];
-    thrust::pair<difference,difference> block_input_end   = input_partition_offsets[partition_idx + 1];
-
-    // count the size of the set operation
-    difference count = block::count_set_operation<threads_per_block,work_per_thread>(ctx,
-                                                                                     first1 + block_input_begin.first,  first1 + block_input_end.first,
-                                                                                     first2 + block_input_begin.second, first2 + block_input_end.second,
-                                                                                     comp,
-                                                                                     set_op);
-
-    if(ctx.thread_index() == 0)
-    {
-      result[partition_idx] = count;
-    }
-  }
-}
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename OutputIterator, typename Compare, typename SetOperation>
-  struct count_set_operation_closure
-{
-  typedef statically_blocked_thread_array<threads_per_block> context_type;
-
-  InputIterator1 input_partition_offsets;
-  Size           num_partitions;
-  InputIterator2 first1;
-  InputIterator3 first2;
-  OutputIterator result;
-  Compare        comp;
-  SetOperation   set_op;
-
-  count_set_operation_closure(InputIterator1 input_partition_offsets,
-                              Size           num_partitions,
-                              InputIterator2 first1,
-                              InputIterator3 first2,
-                              OutputIterator result,
-                              Compare        comp,
-                              SetOperation   set_op)
-    : input_partition_offsets(input_partition_offsets),
-      num_partitions(num_partitions),
-      first1(first1),
-      first2(first2),
-      result(result),
-      comp(comp),
-      set_op(set_op)
-  {}
-
-  inline __device__ void operator()() const
-  {
-    context_type ctx;
-    count_set_operation<threads_per_block,work_per_thread>(ctx, input_partition_offsets, num_partitions, first1, first2, result, comp, set_op);
-  }
-};
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename OutputIterator, typename Compare, typename SetOperation>
-  count_set_operation_closure<threads_per_block,work_per_thread,InputIterator1,Size,InputIterator2,InputIterator3,OutputIterator,Compare,SetOperation>
-    make_count_set_operation_closure(InputIterator1 input_partition_offsets,
-                                     Size           num_partitions,
-                                     InputIterator2 first1,
-                                     InputIterator3 first2,
-                                     OutputIterator result,
-                                     Compare        comp,
-                                     SetOperation   set_op)
-{
-  typedef count_set_operation_closure<threads_per_block,work_per_thread,InputIterator1,Size,InputIterator2,InputIterator3,OutputIterator,Compare,SetOperation> result_type;
-  return result_type(input_partition_offsets,num_partitions,first1,first2,result,comp,set_op);
-}
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator, typename Compare, typename SetOperation>
-inline __device__
-  void set_operation(statically_blocked_thread_array<threads_per_block> &ctx,
-                     InputIterator1                                      input_partition_offsets,
-                     Size                                                num_partitions,
-                     InputIterator2                                      first1,
-                     InputIterator3                                      first2,
-                     InputIterator4                                      output_partition_offsets,
-                     OutputIterator                                      result,
-                     Compare                                             comp,
-                     SetOperation                                        set_op)
-{
-  // consume partitions
-  for(Size partition_idx = ctx.block_index();
-      partition_idx < num_partitions;
-      partition_idx += ctx.grid_dimension())
-  {
-    typedef typename thrust::iterator_difference<InputIterator2>::type difference;
-
-    // find the partition
-    thrust::pair<difference,difference> block_input_begin = input_partition_offsets[partition_idx];
-    thrust::pair<difference,difference> block_input_end   = input_partition_offsets[partition_idx + 1];
-
-    // do the set operation across the partition
-    block::set_operation<threads_per_block,work_per_thread>(ctx,
-                                                            first1 + block_input_begin.first,  first1 + block_input_end.first,
-                                                            first2 + block_input_begin.second, first2 + block_input_end.second,
-                                                            result + output_partition_offsets[partition_idx],
-                                                            comp,
-                                                            set_op);
-  }
-}
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator, typename Compare, typename SetOperation>
-  struct set_operation_closure
-{
-  typedef statically_blocked_thread_array<threads_per_block> context_type;
-
-  InputIterator1 input_partition_offsets;
-  Size           num_partitions;
-  InputIterator2 first1;
-  InputIterator3 first2;
-  InputIterator4 output_partition_offsets;
-  OutputIterator result;
-  Compare        comp;
-  SetOperation   set_op;
-
-  set_operation_closure(InputIterator1 input_partition_offsets,
-                        Size           num_partitions,
-                        InputIterator2 first1,
-                        InputIterator3 first2,
-                        InputIterator4 output_partition_offsets,
-                        OutputIterator result,
-                        Compare        comp,
-                        SetOperation   set_op)
-    : input_partition_offsets(input_partition_offsets),
-      num_partitions(num_partitions),
-      first1(first1),
-      first2(first2),
-      output_partition_offsets(output_partition_offsets),
-      result(result),
-      comp(comp),
-      set_op(set_op)
-  {}
-
-  inline __device__ void operator()() const
-  {
-    context_type ctx;
-    set_operation<threads_per_block,work_per_thread>(ctx, input_partition_offsets, num_partitions, first1, first2, output_partition_offsets, result, comp, set_op);
-  }
-};
-
-
-template<uint16_t threads_per_block, uint16_t work_per_thread, typename InputIterator1, typename Size, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator, typename Compare, typename SetOperation>
-  set_operation_closure<threads_per_block,work_per_thread,InputIterator1,Size,InputIterator2,InputIterator3,InputIterator4,OutputIterator,Compare,SetOperation>
-    make_set_operation_closure(InputIterator1 input_partition_offsets,
-                               Size           num_partitions,
-                               InputIterator2 first1,
-                               InputIterator3 first2,
-                               InputIterator4 output_partition_offsets,
-                               OutputIterator result,
-                               Compare        comp,
-                               SetOperation   set_op)
-{
-  typedef set_operation_closure<threads_per_block,work_per_thread,InputIterator1,Size,InputIterator2,InputIterator3,InputIterator4,OutputIterator,Compare,SetOperation> result_type;
-  return result_type(input_partition_offsets,num_partitions,first1,first2,output_partition_offsets,result,comp,set_op);
-}
-
-
-} // end namespace set_operation_detail
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare, typename SetOperation>
-  OutputIterator set_operation(thrust::cuda::execution_policy<DerivedPolicy> &exec,
-                               InputIterator1 first1, InputIterator1 last1,
-                               InputIterator2 first2, InputIterator2 last2,
-                               OutputIterator result,
-                               Compare comp,
-                               SetOperation set_op)
-{
-  using thrust::system::cuda::detail::device_properties;
-  using thrust::system::cuda::detail::detail::launch_closure;
-  namespace d = thrust::system::cuda::detail::detail::set_operation_detail;
-
-  typedef typename thrust::iterator_difference<InputIterator1>::type difference;
-
-  const difference n1 = last1 - first1;
-  const difference n2 = last2 - first2;
-
-  // handle empty input
-  if(n1 == 0 && n2 == 0)
-  {
-    return result;
-  }
-
-  const thrust::detail::uint16_t work_per_thread   = 15;
-  const thrust::detail::uint16_t threads_per_block = 128;
-  const thrust::detail::uint16_t work_per_block    = threads_per_block * work_per_thread;
-
-  // -1 because balanced_path adds a single element to the end of a "starred" partition, increasing its size by one
-  const thrust::detail::uint16_t maximum_partition_size = work_per_block - 1;
-  const difference num_partitions = thrust::detail::util::divide_ri(n1 + n2, maximum_partition_size);
-
-  // find input partition offsets
-  // +1 to handle the end of the input elegantly
-  thrust::detail::temporary_array<thrust::pair<difference,difference>, DerivedPolicy> input_partition_offsets(0, exec, num_partitions + 1);
-  d::find_partition_offsets<difference>(exec, input_partition_offsets.size(), maximum_partition_size, first1, last1, first2, last2, input_partition_offsets.begin(), comp);
-
-  const difference num_blocks = thrust::min<difference>(device_properties().maxGridSize[0], num_partitions);
-
-  // find output partition offsets
-  // +1 to store the total size of the total
-  thrust::detail::temporary_array<difference, DerivedPolicy> output_partition_offsets(0, exec, num_partitions + 1);
-  launch_closure(d::make_count_set_operation_closure<threads_per_block,work_per_thread>(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), comp, set_op),
-                 num_blocks,
-                 threads_per_block);
-
-  // turn the output partition counts into offsets to output partitions
-  thrust::exclusive_scan(exec, output_partition_offsets.begin(), output_partition_offsets.end(), output_partition_offsets.begin());
-
-  // run the set op kernel
-  launch_closure(d::make_set_operation_closure<threads_per_block,work_per_thread>(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), result, comp, set_op),
-                 num_blocks,
-                 threads_per_block);
-
-  return result + output_partition_offsets[num_partitions];
-}
-
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h
deleted file mode 100644
index 23f32545af..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file stable_merge_sort_dev.h
- *  \brief Defines the interface for a stable merge implementation on CUDA
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_merge_sort(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator first,
-                       RandomAccessIterator last,
-                       StrictWeakOrdering comp);
-    
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_merge_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 keys_begin,
-                              RandomAccessIterator1 keys_end,
-                              RandomAccessIterator2 values_begin,
-                              StrictWeakOrdering comp);
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/detail/stable_merge_sort.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl
deleted file mode 100644
index 0c69803294..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl
+++ /dev/null
@@ -1,1103 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file stable_merge_sort.inl
- *  \brief Inline file for stable_merge_sort.h.
- *  \note This algorithm is based on the one described
- *        in "Designing Efficient Sorting Algorithms for
- *        Manycore GPUs", by Satish, Harris, and Garland.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/functional.h>
-#include <thrust/detail/copy.h>
-
-#include <thrust/detail/function.h>
-
-#include <thrust/detail/mpl/math.h> // for log2<N>
-#include <thrust/detail/util/blocking.h>
-#include <thrust/iterator/iterator_traits.h>
-
-#include <thrust/system/cuda/detail/detail/stable_sort_by_count.h>
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/uninitialized.h>
-#include <thrust/system/cuda/detail/detail/cached_temporary_allocator.h>
-#include <thrust/system/cuda/detail/block/merge.h>
-#include <thrust/system/cuda/detail/block/copy.h>
-#include <thrust/pair.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/gather.h>
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace stable_merge_sort_detail
-{
-
-
-template<unsigned int log_block_size, typename Key, typename Value>
-  struct is_block_size_valid
-{
-  // assume sm_10 limits
-  static const unsigned int max_num_smem_bytes = 16384;
-
-  // CUDA steals 256 for itself for kernel parms
-  static const unsigned int num_reserved_smem_bytes = 256;
-
-  // the number of bytes available to our kernels
-  static const unsigned int num_available_smem_bytes = max_num_smem_bytes - num_reserved_smem_bytes;
-
-  // merge_small_tiles_by_key_closure is the hungriest kernel
-  // the block_size it uses is 2x the size of all the other kernels
-  // this merge_small_tiles_by_key_closure's smem requirements:
-  //   2 * block_size_x2 * sizeof(Key)
-  // + 2 * block_size_x2 * sizeof(Key)
-  // + 2 * block_size_x2 * sizeof(Value)
-  // ================================
-  // 4 * (block_size) * (2 * sizeof(Key) + sizeof(Value))
-  static const unsigned int num_needed_smem_bytes = 4 * (1 << log_block_size) * (2 * sizeof(Key) + sizeof(Value));
-
-  static const bool value = num_needed_smem_bytes <= num_available_smem_bytes;
-};
-
-
-
-// choose a (log) block_size to use for our kernels
-template<unsigned int log_preferred_block_size, typename Key, typename Value>
-  struct select_log_block_size
-    : thrust::detail::eval_if<
-        is_block_size_valid<log_preferred_block_size, Key, Value>::value,
-        thrust::detail::integral_constant<unsigned int, log_preferred_block_size>,
-        select_log_block_size<log_preferred_block_size - 1, Key, Value>
-      >::type
-{};
-
-
-// don't recurse lower than block_size < 128
-template<typename Key, typename Value>
-  struct select_log_block_size<6, Key, Value>
-{
-  // no block size exists which can satisfy the storage demands
-};
-
-
-template<typename Key, typename Value>
-  struct block_size
-{
-  // prefer block_size == 512, go lower if we need to
-  static const unsigned int value = 1 << select_log_block_size<8, Key, Value>::value;
-};
-
-
-template <typename Size>
-inline unsigned int max_grid_size(Size block_size)
-{
-  const device_properties_t& properties = device_properties();
-
-  const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount;
-  const unsigned int max_blocks  = properties.maxGridSize[0];
-  
-  return std::min<unsigned int>(max_blocks, 3 * max_threads / block_size);
-} // end max_grid_size()
-
-
-// Base case for the merge algorithm: merges data where tile_size <= block_size. 
-// Works by loading two or more tiles into shared memory and doing a binary search.
-template<unsigned int block_size,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering,
-         typename Context>
-struct merge_small_tiles_by_key_closure
-{
-  typedef Context context_type;
-
-  RandomAccessIterator1 keys_first;
-  RandomAccessIterator2 values_first;
-  const unsigned int n;
-  const unsigned int log_tile_size;
-  RandomAccessIterator3 keys_result;
-  RandomAccessIterator4 values_result;
-  StrictWeakOrdering comp;
-  context_type context;
-
-  // these members are derivable from block_size, n, and log_tile_size
-  unsigned int index_of_last_block;
-  unsigned int index_of_last_tile_in_last_block;
-  unsigned int size_of_last_tile;
-
-  merge_small_tiles_by_key_closure
-    (RandomAccessIterator1 keys_first,
-     RandomAccessIterator2 values_first,
-     const unsigned int n,
-     const unsigned int log_tile_size,
-     RandomAccessIterator3 keys_result,
-     RandomAccessIterator4 values_result,
-     StrictWeakOrdering comp,
-     Context context = Context())
-    : keys_first(keys_first), values_first(values_first),
-      n(n), 
-      log_tile_size(log_tile_size),
-      keys_result(keys_result), values_result(values_result),
-      comp(comp),
-      context(context)
-  {
-    // compute the number of tiles, including a possible partial tile
-    unsigned int tile_size = 1 << log_tile_size;
-    unsigned int num_tiles = thrust::detail::util::divide_ri(n, tile_size);
-    unsigned int partial_tile_size = n % tile_size;
-
-    // compute the number of logical thread blocks, including a possible partial block
-    unsigned int tiles_per_block = block_size / tile_size;
-    unsigned int num_blocks = thrust::detail::util::divide_ri(num_tiles, tiles_per_block);
-    unsigned int partial_block_size = num_tiles % tiles_per_block;
-
-    // compute the number of tiles in the last block, which might be of partial size
-    unsigned int number_of_tiles_in_last_block = partial_block_size ? partial_block_size : tiles_per_block;
-
-    size_of_last_tile = partial_tile_size ? partial_tile_size : tile_size;
-    index_of_last_tile_in_last_block = number_of_tiles_in_last_block - 1;
-    index_of_last_block = num_blocks - 1;
-  }
-
-  unsigned int grid_size() const
-  {
-    const unsigned int max_num_blocks = max_grid_size(block_size);
-    const unsigned int num_logical_blocks = index_of_last_block + 1;
-    return thrust::min<unsigned int>(num_logical_blocks, max_num_blocks);
-  }
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename iterator_value<RandomAccessIterator3>::type KeyType;
-    typedef typename iterator_value<RandomAccessIterator4>::type ValueType;
-
-    // load (2*block_size) elements into shared memory. These (2*block_size) elements belong to (2*block_size)/tile_size different tiles.
-    __shared__ uninitialized_array<KeyType, 2 * block_size>   key;
-    __shared__ uninitialized_array<KeyType, 2 * block_size>   outkey;
-    __shared__ uninitialized_array<ValueType, 2 * block_size> outvalue;
-
-    const unsigned int grid_size = context.grid_dimension() * context.block_dimension();
-
-    unsigned int block_idx = context.block_index();
-    
-    // the global index of this task
-    unsigned int i = context.thread_index() + context.block_index() * context.block_dimension();
-
-    // advance iterators
-    keys_first    += i;
-    values_first  += i;
-    keys_result   += i;
-    values_result += i;
-
-    for(;
-        block_idx <= index_of_last_block;
-        block_idx += context.grid_dimension(), i += grid_size, keys_first += grid_size, values_first += grid_size, keys_result += grid_size, values_result += grid_size)
-    {
-      KeyType my_key;
-      
-      // copy over inputs to shared memory
-      if(i < n)
-      {
-        key[context.thread_index()] = my_key = *keys_first;
-      } // end if
-      
-      // the tile to which the element belongs
-      unsigned int tile_index = context.thread_index()>>log_tile_size;
-
-      // figure out the index and size of the other tile
-      unsigned int other_tile_index = tile_index^1;
-      unsigned int other_tile_size = (1<<log_tile_size);
-
-      // if the other tile is the final tile, it is potentially
-      // smaller than the rest
-      if(block_idx == index_of_last_block
-         && other_tile_index == index_of_last_tile_in_last_block)
-      {
-        other_tile_size = size_of_last_tile;
-      } // end if
-      
-      // figure out where the other tile begins in shared memory
-      KeyType *other = key.data() + (other_tile_index<<log_tile_size);
-
-      context.barrier();
-      if(i < n)
-      {
-        // to compute the rank of my element in the merged sequence
-        // add the rank of the element in the other tile
-        // plus the rank of the element in this tile
-        // the computation for the rank of the element in this tile 
-        // differs depending on if we're in the odd or even tile
-        unsigned int rank;
-        if(tile_index & 1)
-        {
-          rank = thrust::system::detail::generic::scalar::upper_bound_n(other, other_tile_size, my_key, comp) - other;
-          rank += context.thread_index() - (1<<log_tile_size);
-        }
-        else
-        {
-          rank = thrust::system::detail::generic::scalar::lower_bound_n(other, other_tile_size, my_key, comp) - other;
-          rank += context.thread_index();
-        }
-
-        // store my key and value to the output arrays in smem
-        outkey[rank] = my_key;
-        outvalue[rank] = *values_first;
-      } // end if
-      context.barrier();
-      
-      if(i < n)
-      {
-        // coalesced writes to global memory
-        *keys_result   = outkey[context.thread_index()];
-        *values_result = outvalue[context.thread_index()];
-      } // end if
-      context.barrier();
-    } // end for
-  } // end operator()
-}; // merge_small_tiles_by_key_closure
-
-
-template<unsigned int stride>
-  class static_strided_integer_range
-{
-  // XXX cudafe doesn't like this private for some reason
-  //private:
-  public:
-    typedef typename thrust::counting_iterator<unsigned int> counting_iterator;
-
-    struct stride_functor
-      : public thrust::unary_function<unsigned int,unsigned int>
-    {
-      inline __host__ __device__
-      unsigned int operator()(unsigned int i) const
-      {
-        return stride * i;
-      }
-    };
-
-  public:
-    typedef typename thrust::transform_iterator<stride_functor, counting_iterator> iterator;
-
-    static_strided_integer_range(unsigned int num_strides)
-      : m_begin(iterator(counting_iterator(0), stride_functor())),
-        m_end(iterator(counting_iterator(num_strides), stride_functor()))
-    {}
-
-    iterator begin() const
-    {
-      return m_begin;
-    }
-
-    iterator end() const
-    {
-      return m_end;
-    }
-
-  private:
-    iterator m_begin, m_end;
-};
-
-
-///////////////////// Find the rank of each extracted element in both arrays ////////////////////////////////////////
-///////////////////// This breaks up the array into independent segments to merge ////////////////////////////////////////
-// Inputs: d_splitters, d_splittes_pos: the merged array of splitters with corresponding positions.
-//		   d_srcData: input data, datasize: number of entries in d_srcData
-//		   N_SPLITTERS the number of splitters, log_blocksize: log of the size of each block of sorted data
-//		   log_num_merged_splitters_per_tile = log of the number of merged splitters. ( = log_blocksize - 7). 
-// Output: d_rank1, d_rank2: ranks of each splitter in d_splitters in the block to which it belongs
-//		   (say i) and its corresponding block (block i+1).
-template<unsigned int block_size,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering,
-         typename Context>
-struct rank_splitters_closure
-{
-  typedef Context context_type;
-
-  static const unsigned int log_block_size = thrust::detail::mpl::math::log2<block_size>::value;
-
-  RandomAccessIterator1 splitters_first;
-  RandomAccessIterator2 splitters_pos_first;
-  RandomAccessIterator3 keys_first;
-  RandomAccessIterator4 ranks_result1;
-  RandomAccessIterator4 ranks_result2;
-  unsigned int num_splitters;
-  unsigned int num_keys;
-  unsigned int log_tile_size;
-  thrust::detail::device_function<
-    StrictWeakOrdering,
-    bool
-  > comp;
-  context_type context;
-
-  // this member is derivable from those received in the constructor
-  unsigned int log_num_merged_splitters_per_tile;
-
-  rank_splitters_closure(RandomAccessIterator1 splitters_first,
-                         RandomAccessIterator2 splitters_pos_first, 
-                         RandomAccessIterator3 keys_first,
-                         unsigned int num_splitters,
-                         unsigned int num_keys, 
-                         unsigned int log_tile_size, 
-                         RandomAccessIterator4 ranks_result1,
-                         RandomAccessIterator4 ranks_result2, 
-                         StrictWeakOrdering comp,
-                         context_type context = context_type())
-    : splitters_first(splitters_first), splitters_pos_first(splitters_pos_first),
-      keys_first(keys_first),
-      ranks_result1(ranks_result1), ranks_result2(ranks_result2),
-      num_splitters(num_splitters), num_keys(num_keys),
-      log_tile_size(log_tile_size),
-      comp(comp), context(context)
-  {
-    // the number of splitters in each tile before merging
-    const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size;
-
-    // the number of splitters in each merged tile
-    log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1;
-  }
-
-  inline unsigned int grid_size() const
-  {
-    unsigned int num_blocks = num_splitters / block_size;
-    if(num_splitters % block_size) ++num_blocks;
-
-    // compute the maximum number of block_size we can launch on this arch
-    const unsigned int max_num_blocks = max_grid_size(block_size);
-
-    return min<unsigned int>(num_blocks, max_num_blocks);
-  }
-
-  /*! this member function returns the index of the (odd,even) block pair
-   *  that the splitter of interest belongs to
-   *  \param splitter_idx The index of the splitter in the splitters list
-   *  \return The global index of the (odd,even) block pair
-   */
-  __device__ __thrust_forceinline__
-  unsigned int block_pair_idx(unsigned int splitter_idx) const
-  {
-    return splitter_idx >> log_num_merged_splitters_per_tile;
-  }
-
-  /*! This member function returns the end of the search range in the other tile in
-   *  which the splitter of interest needs to be ranked.
-   *  \param splitter_idx The index of the splitter in the splitters array
-   *  \param splitter_global_idx The index of the splitter in the global array of elements
-   *  \param tile_idx The index of the tile to which the splitter belongs.
-   *  \return The half-open interval in the other tile in which the splitter needs to be ranked.
-   *          [first_index_to_search, size_of_interval)
-   */
-  __device__ __thrust_forceinline__
-  thrust::pair<unsigned int,unsigned int> search_interval(unsigned int splitter_idx, unsigned int splitter_global_idx, unsigned int tile_idx) const
-  {
-    // We want to compute the ranks of the splitter in d_srcData1 and d_srcData2
-    // for instance, if the splitter belongs to d_srcData1, then 
-    // (1) the rank in d_srcData1 is simply given by its splitter_global_idx
-    // (2) to find the rank in d_srcData2, we first find the block in d_srcData2 where inp appears.
-    //     We do this by noting that we have already merged/sorted splitters, and thus the rank
-    //     of inp in the elements of d_srcData2 that are present in splitters is given by 
-    //        position of inp in d_splitters - rank of inp in elements of d_srcData1 in splitters
-    //        = i - splitter_global_idx
-    //     This also gives us the block of d_srcData2 that the splitter belongs in, since we have one
-    //     element in splitters per block of d_srcData2.
-    
-    //     We now perform a binary search over this block of d_srcData2 to find the rank of inp in d_srcData2.
-    //     start and end are the start and end indices of this block in d_srcData2, forming the bounds of the binary search.
-    //     Note that this binary search is in global memory with uncoalesced loads. However, we only find the ranks 
-    //     of a small set of elements, one per splitter: thus it is not the performance bottleneck.
-    
-    // the local index of the splitter within the (odd, even) block pair.
-    const unsigned int splitter_block_pair_idx = splitter_idx - (block_pair_idx(splitter_idx)<<log_num_merged_splitters_per_tile);
-
-    // the index of the splitter within its tile
-    const unsigned int splitter_tile_idx = splitter_global_idx - (tile_idx<<log_tile_size);
-
-    // the index of the splitter's block within its tile
-    const unsigned int block_tile_idx = splitter_tile_idx >> log_block_size;
-    
-    // find the end of the search range in the other tile
-    unsigned int end = (( splitter_block_pair_idx - block_tile_idx) << log_block_size);
-
-    // begin by assuming the search range is the size of a full block
-    unsigned int other_block_size = block_size;
-
-    // the index of the other tile can be found with
-    const unsigned int other_tile_idx = tile_idx ^ 1;
-    
-    // the size of the other tile can be less than tile_size if the it is the last tile.
-    unsigned int other_tile_size = min<unsigned int>(1 << log_tile_size, num_keys - (other_tile_idx<<log_tile_size));
-
-    if(end > other_tile_size)
-    {
-      // the other block has partial size
-      end = other_tile_size;
-      other_block_size = num_keys % block_size;
-    }
-    else if(end == 0)
-    {
-      // when the search range is empty
-      // the other_block_size is 0
-      other_block_size = 0;
-    }
-
-    // the search range begins other_block_size elements before the end
-    unsigned int start = end - other_block_size;
-
-    return thrust::make_pair(start,other_block_size);
-  }
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename iterator_value<RandomAccessIterator1>::type KeyType;
-    typedef typename iterator_value<RandomAccessIterator2>::type IndexType;
-  
-    const unsigned int grid_size = context.grid_dimension() * context.block_dimension();
-  
-    unsigned int splitter_idx = context.thread_index() + context.block_index() * context.block_dimension();
-  
-    // advance iterators
-    splitters_first     += splitter_idx;
-    splitters_pos_first += splitter_idx;
-    ranks_result1       += splitter_idx;
-    ranks_result2       += splitter_idx;
-    
-    for(;
-        splitter_idx < num_splitters;
-        splitter_idx += grid_size, splitters_first += grid_size, splitters_pos_first += grid_size, ranks_result1 += grid_size, ranks_result2 += grid_size)
-    {
-      // the index of the splitter within the global array of elements
-      IndexType splitter_global_idx = *splitters_pos_first;
-
-      // the tile to which the splitter belongs.
-      unsigned int tile_idx = (splitter_global_idx >> log_tile_size);
-      
-      // the index of the "other" tile which which tile_idx must be merged.
-      unsigned int other_tile_idx = tile_idx^1;
-
-      // compute the interval in the other tile to search
-      unsigned int start, n;
-      thrust::tie(start,n) = search_interval(splitter_idx, splitter_global_idx, tile_idx);
-
-      // point to the beginning of the other tile
-      RandomAccessIterator3 other_tile_begin = keys_first + (other_tile_idx<<log_tile_size);
-
-      // offset the pointer to the other tile by the search range's offset
-      RandomAccessIterator3 search_range_begin = other_tile_begin + start;
-      
-      // find the rank of our splitter in the other tile
-      KeyType splitter = *splitters_first;
-
-      // the index of the splitter within its tile
-      // this is one of the output ranks
-      const unsigned int splitter_tile_idx = splitter_global_idx - (tile_idx<<log_tile_size);
-
-      // branch depending on whether or not our splitter is in the odd tile
-      if(tile_idx & 1)
-      {
-        unsigned int result = thrust::system::detail::generic::scalar::upper_bound_n(search_range_begin, n, splitter, comp) - search_range_begin;
-
-        *ranks_result1 = start + result;
-
-        *ranks_result2 = splitter_tile_idx;
-      } // end if
-      else
-      {
-        unsigned int result = thrust::system::detail::generic::scalar::lower_bound_n(search_range_begin, n, splitter, comp) - search_range_begin;
-
-        *ranks_result1 = splitter_tile_idx;
-
-        *ranks_result2 = start + result;
-      } // end else
-    } // end for
-  } // end operator()
-}; // rank_splitters_closure
-
-
-template<unsigned int block_size,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering>
-  void rank_splitters(RandomAccessIterator1 splitters_first,
-                      RandomAccessIterator1 splitters_last,
-                      RandomAccessIterator2 splitter_positions_first,
-                      RandomAccessIterator3 keys_first,
-                      RandomAccessIterator3 keys_last,
-                      size_t log_tile_size,
-                      RandomAccessIterator4 ranks_result1,
-                      RandomAccessIterator4 ranks_result2,
-                      StrictWeakOrdering comp)
-{
-  typedef rank_splitters_closure<
-    block_size,
-    RandomAccessIterator1,
-    RandomAccessIterator2,
-    RandomAccessIterator3,
-    RandomAccessIterator4,
-    StrictWeakOrdering,
-    detail::statically_blocked_thread_array<block_size>
-  > Closure;
-
-  Closure closure(splitters_first,
-                  splitter_positions_first,
-                  keys_first,
-                  splitters_last - splitters_first,
-                  keys_last - keys_first,
-                  log_tile_size,
-                  ranks_result1,
-                  ranks_result2,
-                  comp);
-
-  detail::launch_closure(closure, closure.grid_size(), block_size);
-}
-
-
-template<typename Context,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Size,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4>
-__device__
-  void copy_n(Context context,
-              RandomAccessIterator1 first1,
-              RandomAccessIterator2 first2,
-              Size n,
-              RandomAccessIterator3 result1,
-              RandomAccessIterator4 result2)
-{
-  for(Size i = context.thread_index();
-      i < n;
-      i += context.block_dimension())
-  {
-    result1[i] = first1[i];
-    result2[i] = first2[i];
-  }
-}
-
-
-///////////////////// MERGE TWO INDEPENDENT SEGMENTS USING BINARY SEARCH IN SHARED MEMORY ////////////////////////////////////////
-// NOTE: This is the most compute-intensive part of the algorithm. 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Thread block i merges entries between rank[i] and rank[i+1]. These can be independently
-// merged and concatenated, as noted above. 
-// Each thread in the thread block i does a binary search of one element between rank[i] -> rank[i+1] in the 
-// other array. 
-
-// Inputs: srcdatakey, value: inputs
-//         log_blocksize, log_num_merged_splitters_per_tile: as in previous functions
-// Outputs: resultdatakey, resultdatavalue: output merged arrays are written here.
-template<unsigned int block_size,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename RandomAccessIterator5,
-         typename RandomAccessIterator6,
-         typename StrictWeakOrdering,
-         typename Context>
-struct merge_subtiles_by_key_closure
-{	
-  typedef Context context_type;
-  static const unsigned int log_block_size = thrust::detail::mpl::math::log2<block_size>::value;
-
-  RandomAccessIterator1 keys_first;
-  RandomAccessIterator2 values_first;
-  unsigned int n;
-  RandomAccessIterator3 ranks_first1;
-  RandomAccessIterator4 ranks_first2; 
-  const unsigned int tile_size;
-  const unsigned int num_splitters;
-  RandomAccessIterator5 keys_result;
-  RandomAccessIterator6 values_result;
-  StrictWeakOrdering comp;
-  Context context;
-
-  // this member is derivable from the constructor parameters
-  unsigned int log_num_merged_splitters_per_tile;
-
-  merge_subtiles_by_key_closure
-    (RandomAccessIterator1 keys_first,
-     RandomAccessIterator2 values_first,
-     unsigned int n, 
-     RandomAccessIterator3 ranks_first1,
-     RandomAccessIterator4 ranks_first2, 
-     const unsigned int log_tile_size, 
-     const unsigned int num_splitters,
-     RandomAccessIterator5 keys_result,
-     RandomAccessIterator6 values_result,
-     StrictWeakOrdering comp,
-     Context context = Context())
-    : keys_first(keys_first), values_first(values_first), n(n),
-      ranks_first1(ranks_first1), ranks_first2(ranks_first2),
-      tile_size(1 << log_tile_size),
-      num_splitters(num_splitters),
-      keys_result(keys_result), values_result(values_result),
-      comp(comp), context(context)
-  {
-    const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size;
-    log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1;
-  }
-
-  unsigned int grid_size() const
-  {
-    const unsigned int max_num_blocks = max_grid_size(block_size);
-    return thrust::min<unsigned int>(num_splitters, max_num_blocks);
-  }
-
-  __device__ __thrust_forceinline__
-  unsigned int even_offset(unsigned int oddeven_blockid) const
-  {
-    return oddeven_blockid << (log_num_merged_splitters_per_tile + log_block_size);
-  }
-
-  __device__ __thrust_forceinline__
-  void get_partition(unsigned int partition_idx, unsigned int oddeven_blockid,
-                     unsigned int &rank1, unsigned int &size1,
-                     unsigned int &rank2, unsigned int &size2) const
-  {
-    // XXX this logic would be much improved if we were guaranteed that there was 
-    //     an element at ranks_first[1]
-    // XXX we could eliminate the need for local_blockIdx, log_num_merged_splitters_per_block, tile_size, and n
-    
-    // the index of the merged splitter within the splitters for the odd-even block pair.
-    unsigned int local_blockIdx = partition_idx - (oddeven_blockid<<log_num_merged_splitters_per_tile);
-
-    rank1 = *ranks_first1;
-    rank2 = *ranks_first2;
-  
-    // get the rank of the next splitter if we aren't processing the very last splitter of a partially full tile
-    // or if we aren't processing the last splitter in our tile
-    if((partition_idx == num_splitters - 1) || (local_blockIdx == ((1<<log_num_merged_splitters_per_tile)-1)))
-    {
-      // we're at the end
-      size1 = size2 = tile_size;
-    } // end if
-    else
-    {
-      // dereference the rank of the *next* splitter
-      size1 = ranks_first1[1];
-      size2 = ranks_first2[1];
-    } // end else
-    
-    // Adjust size2 to account for the last block possibly not being full.
-    // check if size2 would fall off the end of the array
-    if((even_offset(oddeven_blockid) + tile_size + size2) > n)
-    {
-      size2 = n - tile_size - even_offset(oddeven_blockid);
-    } // end if
-  
-    // measure each array relative to its beginning
-    size1 -= rank1;
-    size2 -= rank2;
-  }
-
-  template<typename KeyType, typename ValueType>
-  __device__ __thrust_forceinline__
-  void do_it(KeyType *s_keys, ValueType *s_values)
-  {
-    // advance iterators
-    unsigned int i = context.block_index();
-    ranks_first1 += i;
-    ranks_first2 += i;
-    
-    // Thread Block i merges the sub-block associated with splitter i: rank[i] -> rank[i+1] in a particular odd-even block pair.
-    for(;
-        i < num_splitters;
-        i += context.grid_dimension(), ranks_first1 += context.grid_dimension(), ranks_first2 += context.grid_dimension())
-    {
-      // the (odd, even) block pair that the splitter belongs to.
-      unsigned int oddeven_blockid = i >> log_num_merged_splitters_per_tile;
-      
-      // start1 & start2 store rank[i] and rank[i+1] indices in arrays 1 and 2.
-      // size1 & size2 store the number of of elements between rank[i] & rank[i+1] in arrays 1 & 2.
-      unsigned int rank1, rank2, size1, size2;
-      get_partition(i, oddeven_blockid, rank1, size1, rank2, size2);
-  
-      // find where the odd,even arrays begin
-      RandomAccessIterator1 even_keys_first = keys_first + even_offset(oddeven_blockid);
-      RandomAccessIterator1 odd_keys_first  = even_keys_first + tile_size;
-  
-      RandomAccessIterator2 even_values_first = values_first + even_offset(oddeven_blockid);
-      RandomAccessIterator2 odd_values_first  = even_values_first + tile_size;
-      
-      // load tiles into smem
-      copy_n(context, even_keys_first + rank1, even_values_first + rank1, size1, s_keys, s_values);
-      copy_n(context, odd_keys_first  + rank2, odd_values_first  + rank2, size2, s_keys + size1, s_values + size1);
-
-      context.barrier();
-  
-      // merge the arrays in-place
-      block::inplace_merge_by_key_n(context, s_keys, s_values, size1, size2, comp);
-
-      context.barrier();
-      
-      // write tiles to gmem
-      unsigned int dst_offset = even_offset(oddeven_blockid) + rank1 + rank2;
-      copy_n(context, s_keys, s_values, size1 + size2, keys_result + dst_offset, values_result + dst_offset);
-
-      context.barrier();
-    } // end for i
-  }
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename iterator_value<RandomAccessIterator5>::type KeyType;
-    typedef typename iterator_value<RandomAccessIterator6>::type ValueType;
-  
-    __shared__ uninitialized_array<KeyType,   2 * block_size> s_keys;
-    __shared__ uninitialized_array<ValueType, 2 * block_size> s_values;
-  
-    do_it(s_keys.data(), s_values.data());
-  }
-}; // merge_subtiles_by_key_closure
-
-// merge_subtiles_by_key() merges each sub-tile independently. As explained in rank_splitters(), 
-// the sub-tiles are defined by the ranks of the splitter elements d_rank1 and d_rank2 in the odd and even tiles resp.
-// It can be easily shown that each sub-tile cannot contain more than block_size elements of either the odd or even tile.
-
-// the function calls merge_subblocks_binarysearch_kernel() for the remaining N_splitterS sub-tiles
-// We use 1 thread block per splitter: For instance, thread block 0 will merge rank1[0] -> rank1[1] of array i with
-// rank2[0] -> rank2[1] of array i^1, with i being the thread block to which the splitter belongs.
-
-// We implement each sub-tile merge using a binary search. We compute the rank of each element belonging to a sub-tile 
-// of an odd numbered tile in the corresponding sub-tile of its even numbered pair. It then adds this rank to 
-// the index of the element in its own sub-tile to find the output index of the element in the merged sub-tile.
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename RandomAccessIterator5,
-         typename RandomAccessIterator6,
-         typename RandomAccessIterator7,
-         typename StrictWeakOrdering>
-  void merge_subtiles_by_key(RandomAccessIterator1 keys_first,
-                             RandomAccessIterator1 keys_last,
-                             RandomAccessIterator2 values_first,
-                             RandomAccessIterator3 splitters_pos_first, 
-                             RandomAccessIterator3 splitters_pos_last,
-                             RandomAccessIterator4 ranks_first1,
-                             RandomAccessIterator5 ranks_first2, 
-                             RandomAccessIterator6 keys_result,
-                             RandomAccessIterator7 values_result, 
-                             unsigned int log_tile_size, 
-                             StrictWeakOrdering comp)
-{
-  typedef typename iterator_value<RandomAccessIterator6>::type KeyType;
-  typedef typename iterator_value<RandomAccessIterator7>::type ValueType;
-
-  const unsigned int block_size = stable_merge_sort_detail::block_size<KeyType,ValueType>::value;
-
-  typedef merge_subtiles_by_key_closure<
-    block_size,
-    RandomAccessIterator1,
-    RandomAccessIterator2,
-    RandomAccessIterator4,
-    RandomAccessIterator5,
-    RandomAccessIterator6,
-    RandomAccessIterator7,
-    StrictWeakOrdering,
-    detail::statically_blocked_thread_array<block_size>
-  > Closure;
-
-  Closure closure(keys_first,
-                  values_first,
-                  keys_last - keys_first, 
-                  ranks_first1,
-                  ranks_first2, 
-                  log_tile_size,
-                  splitters_pos_last - splitters_pos_first,
-  	          keys_result,
-                  values_result,
-                  comp);
-
-  detail::launch_closure(closure, closure.grid_size(), block_size);
-}
-
-
-template<unsigned int block_size,
-         typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering>
-  void merge_small_tiles_by_key(execution_policy<DerivedPolicy> &,
-                                RandomAccessIterator1 keys_first,
-                                RandomAccessIterator1 keys_last,
-                                RandomAccessIterator2 values_first,
-                                size_t log_tile_size,
-                                RandomAccessIterator3 keys_result,
-                                RandomAccessIterator4 values_result,
-                                StrictWeakOrdering comp)
-{
-  typedef merge_small_tiles_by_key_closure<
-    block_size,
-    RandomAccessIterator1,
-    RandomAccessIterator2,
-    RandomAccessIterator3,
-    RandomAccessIterator4,
-    StrictWeakOrdering,
-    detail::statically_blocked_thread_array<block_size>
-  > Closure;
-
-  Closure closure(keys_first, values_first, keys_last - keys_first, log_tile_size, keys_result, values_result, comp);
-
-  detail::launch_closure(closure, closure.grid_size(), block_size);
-} // end merge_small_tiles_by_key()
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering>
-  void merge_tiles_by_key_recursive(execution_policy<DerivedPolicy> &exec,
-                                    RandomAccessIterator1 keys_first,
-                                    RandomAccessIterator1 keys_last,
-                                    RandomAccessIterator2 values_first,
-                                    RandomAccessIterator3 keys_result,
-                                    RandomAccessIterator4 values_result,
-                                    size_t log_tile_size,
-                                    StrictWeakOrdering comp)
-{
-  typedef typename iterator_value<RandomAccessIterator3>::type KeyType;
-  typedef typename iterator_value<RandomAccessIterator4>::type ValueType;
-
-  const size_t tile_size = 1<<log_tile_size;
-
-  // Compute the block_size based on the types to sort
-  const unsigned int block_size = stable_merge_sort_detail::block_size<KeyType,ValueType>::value;
-
-  // Case (a): tile_size <= block_size
-  if(tile_size <= block_size)
-  {
-    return merge_small_tiles_by_key<2*block_size>(exec, keys_first, keys_last, values_first, log_tile_size, keys_result, values_result, comp);
-  } // end if
-
-  // Case (b) tile_size >= block_size
-
-  // step 1 of the recursive case: gather one splitter per block_size entries in each odd-even tile pair.
-  thrust::detail::temporary_array<KeyType, DerivedPolicy> splitters(exec, thrust::detail::util::divide_ri(keys_last - keys_first, block_size));
-  static_strided_integer_range<block_size>                splitters_pos(splitters.size());
-  thrust::gather(exec, splitters_pos.begin(), splitters_pos.end(), keys_first, splitters.begin());
-                            
-  // step 2 of the recursive case: merge the splitters & their positions
-  thrust::detail::temporary_array<KeyType,      DerivedPolicy> merged_splitters(exec, splitters.size());
-  thrust::detail::temporary_array<unsigned int, DerivedPolicy> merged_splitters_pos(exec, splitters.size());
-
-  const unsigned int log_block_size = thrust::detail::mpl::math::log2<block_size>::value;
-  size_t log_num_splitters_per_tile = log_tile_size - log_block_size;
-  merge_tiles_by_key_recursive(exec,
-                               splitters.begin(),
-                               splitters.end(),
-                               splitters_pos.begin(),
-                               merged_splitters.begin(),
-                               merged_splitters_pos.begin(),
-                               log_num_splitters_per_tile,
-                               comp);
-
-  // step 3 of the recursive case: find the ranks of each splitter in the respective two tiles.
-  // reuse the merged_splitters_pos storage
-  thrust::detail::temporary_array<unsigned int, DerivedPolicy> &rank1 = merged_splitters_pos;
-  thrust::detail::temporary_array<unsigned int, DerivedPolicy> rank2(exec, rank1.size());
-
-  rank_splitters<block_size>(merged_splitters.begin(),
-                             merged_splitters.end(),
-                             merged_splitters_pos.begin(),
-                             keys_first,
-                             keys_last,
-                             log_tile_size,
-                             rank1.begin(),
-                             rank2.begin(),
-                             comp);
-
-  // step 4 of the recursive case: merge each sub-tile independently in parallel.
-  merge_subtiles_by_key(keys_first,
-                        keys_last,
-                        values_first,
-                        merged_splitters_pos.begin(),
-                        merged_splitters_pos.end(),
-                        rank1.begin(),
-                        rank2.begin(),
-                        keys_result,
-                        values_result,
-                        log_tile_size,
-                        comp);
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename RandomAccessIterator4,
-         typename StrictWeakOrdering>
-  void merge_tiles_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator2 values_first,
-                          size_t n,
-                          RandomAccessIterator3 keys_result,
-                          RandomAccessIterator4 values_result,
-                          unsigned int log_tile_size,
-                          StrictWeakOrdering comp)
-{
-  const unsigned int tile_size = 1 << log_tile_size;
-  const size_t num_tiles = thrust::detail::util::divide_ri(n, tile_size);
-
-  // if there is an odd number of tiles, we should exclude the last one
-  // without a twin in merge_recursive
-  const size_t last_tile_offset = (num_tiles%2)?((num_tiles-1)*tile_size):n;
-
-  merge_tiles_by_key_recursive(exec,
-                               keys_first,
-                               keys_first + last_tile_offset,
-                               values_first,
-                               keys_result,
-                               values_result,
-                               log_tile_size,
-                               comp);
-
-  // copy the last tile without a twin, should it exist
-  if(last_tile_offset < n)
-  {
-    thrust::copy(exec, keys_first + last_tile_offset, keys_first + n, keys_result + last_tile_offset);
-    thrust::copy(exec, values_first + last_tile_offset, values_first + n, values_result + last_tile_offset);
-  } // end if
-} // end merge_tiles_by_key()
-
-
-} // end stable_merge_sort_detail
-
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_merge_sort(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator first,
-                       RandomAccessIterator last,
-                       StrictWeakOrdering comp)
-{
-  // XXX it's potentially unsafe to pass the same array for keys & values
-  thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec, first, last, first, comp);
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_merge_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                                RandomAccessIterator1 keys_first,
-                                RandomAccessIterator1 keys_last,
-                                RandomAccessIterator2 values_first,
-                                StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  typedef typename thrust::iterator_traits<RandomAccessIterator2>::value_type ValueType;
-
-  // compute the block_size based on the types we're sorting
-  const unsigned int block_size = stable_merge_sort_detail::block_size<KeyType,ValueType>::value;
-
-  // XXX WAR unused variable warning issued by nvcc
-  (void) block_size;
-
-  // first, sort each tile of block_size elements
-  stable_sort_by_count<block_size>(exec, keys_first, keys_last, values_first, comp);
-
-  // merge tiles if there is more than one
-  const size_t n = keys_last - keys_first;
-  if(n > block_size)
-  {
-    // allocate scratch space
-    using namespace thrust::detail;
-    using namespace stable_merge_sort_detail;
-    temporary_array<KeyType,   DerivedPolicy> temp_keys(exec, n);
-    temporary_array<ValueType, DerivedPolicy> temp_values(exec, n);
-
-    // use a caching allocator for the calls to merge_tiles_by_key
-    // XXX unfortunately g++-4.2 can't deal with this special execution policy
-#if defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION < 40300
-    execution_policy<DerivedPolicy> &merge_exec = exec;
-#else
-    cached_temporary_allocator<DerivedPolicy,thrust::cuda::execution_policy> merge_exec(exec);
-#endif
-
-    // The log(n) iterations start here. Each call to 'merge' merges an odd-even pair of tiles
-    unsigned int log_tile_size = thrust::detail::mpl::math::log2<block_size>::value;
-    bool ping = true;
-    for(; (1u << log_tile_size) < n; ++log_tile_size, ping = !ping)
-    {
-      // we ping-pong back and forth
-      if(ping)
-      {
-        merge_tiles_by_key(merge_exec, keys_first, values_first, n, temp_keys.begin(), temp_values.begin(), log_tile_size, comp);
-      } // end if
-      else
-      {
-        merge_tiles_by_key(merge_exec, temp_keys.begin(), temp_values.begin(), n, keys_first, values_first, log_tile_size, comp);
-      } // end else
-    } // end for
-
-    // this is to make sure that our data is finally in the data and keys arrays
-    // and not in the temporary arrays
-    if(!ping)
-    {
-      thrust::copy(exec, temp_keys.begin(), temp_keys.end(), keys_first);
-      thrust::copy(exec, temp_values.begin(), temp_values.end(), values_first);
-    } // end if
-  } // end if
-} // end stable_merge_sort_by_key()
-
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h
deleted file mode 100644
index 8449a17b0d..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-void stable_primitive_sort(execution_policy<DerivedPolicy> &exec,
-                           RandomAccessIterator first,
-                           RandomAccessIterator last);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_primitive_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                                  RandomAccessIterator1 keys_first,
-                                  RandomAccessIterator1 keys_last,
-                                  RandomAccessIterator2 values_first);
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/detail/stable_primitive_sort.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl
deleted file mode 100644
index d6f4c775b6..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/detail/stable_primitive_sort.h>
-#include <thrust/system/cuda/detail/detail/stable_radix_sort.h>
-#include <thrust/functional.h>
-#include <thrust/partition.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace stable_primitive_sort_detail
-{
-
-
-template<typename Iterator>
-  struct enable_if_bool_sort
-    : thrust::detail::enable_if<
-        thrust::detail::is_same<
-          bool,
-          typename thrust::iterator_value<Iterator>::type
-        >::value
-      >
-{};
-
-
-template<typename Iterator>
-  struct disable_if_bool_sort
-    : thrust::detail::disable_if<
-        thrust::detail::is_same<
-          bool,
-          typename thrust::iterator_value<Iterator>::type
-        >::value
-      >
-{};
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-  typename enable_if_bool_sort<RandomAccessIterator>::type
-    stable_primitive_sort(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator first,
-                          RandomAccessIterator last)
-{
-  // use stable_partition if we're sorting bool
-  // stable_partition puts true values first, so we need to logical_not
-  thrust::stable_partition(exec, first, last, thrust::logical_not<bool>());
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-  typename disable_if_bool_sort<RandomAccessIterator>::type
-    stable_primitive_sort(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator first,
-                          RandomAccessIterator last)
-{
-  // call stable_radix_sort
-  thrust::system::cuda::detail::detail::stable_radix_sort(exec,first,last);
-}
-
-
-struct logical_not_first
-{
-  template<typename Tuple>
-  __host__ __device__
-  bool operator()(Tuple t)
-  {
-    return !thrust::get<0>(t);
-  }
-};
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  typename enable_if_bool_sort<RandomAccessIterator1>::type
-    stable_primitive_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                                 RandomAccessIterator1 keys_first,
-                                 RandomAccessIterator1 keys_last,
-                                 RandomAccessIterator2 values_first)
-{
-  // use stable_partition if we're sorting bool
-  // stable_partition puts true values first, so we need to logical_not
-  thrust::stable_partition(exec,
-                           thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)),
-                           thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)),
-                           logical_not_first());
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  typename disable_if_bool_sort<RandomAccessIterator1>::type
-    stable_primitive_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                                 RandomAccessIterator1 keys_first,
-                                 RandomAccessIterator1 keys_last,
-                                 RandomAccessIterator2 values_first)
-{
-  // call stable_radix_sort_by_key
-  thrust::system::cuda::detail::detail::stable_radix_sort_by_key(exec, keys_first, keys_last, values_first);
-}
-    
-  
-
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-void stable_primitive_sort(execution_policy<DerivedPolicy> &exec,
-                           RandomAccessIterator first,
-                           RandomAccessIterator last)
-{
-  thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort(exec,first,last);
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_primitive_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                                  RandomAccessIterator1 keys_first,
-                                  RandomAccessIterator1 keys_last,
-                                  RandomAccessIterator2 values_first)
-{
-  thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort_by_key(exec, keys_first, keys_last, values_first);
-}
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h
deleted file mode 100644
index 7a8b9964c0..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file stable_radix_sort_dev.h
- *  \brief Defines the interface for a stable radix sort implementation on CUDA
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-void stable_radix_sort(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator first,
-                       RandomAccessIterator last);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_radix_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 keys_first,
-                              RandomAccessIterator1 keys_last,
-                              RandomAccessIterator2 values_first);
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/detail/stable_radix_sort.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl
deleted file mode 100644
index 9ea197702c..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-
-// do not attempt to compile this file with any other compiler
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-
-#include <thrust/detail/copy.h>
-#include <thrust/gather.h>
-#include <thrust/sequence.h>
-#include <thrust/iterator/iterator_traits.h>
-
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/util/align.h>
-#include <thrust/detail/raw_pointer_cast.h>
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-
-#include <thrust/system/cuda/detail/detail/b40c/radixsort_api.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-void stable_radix_sort(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator first,
-                       RandomAccessIterator last)
-{
-    typedef typename thrust::iterator_value<RandomAccessIterator>::type K;
-    
-    unsigned int num_elements = last - first;
-
-    // ensure data is properly aligned
-    if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first), 2*sizeof(K)))
-    {
-        thrust::detail::temporary_array<K, DerivedPolicy> aligned_keys(exec, first, last);
-        stable_radix_sort(exec, aligned_keys.begin(), aligned_keys.end());
-        thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first);
-        return;
-    }
-    
-    thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor<K> sorter(num_elements);
-    thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage<K>    storage;
-    
-    // allocate temporary buffers
-    thrust::detail::temporary_array<K,    DerivedPolicy> temp_keys(exec, num_elements);
-    thrust::detail::temporary_array<int,  DerivedPolicy> temp_spine(exec, sorter.SpineElements());
-    thrust::detail::temporary_array<bool, DerivedPolicy> temp_from_alt(exec, 2);
-
-    // define storage
-    storage.d_keys             = thrust::raw_pointer_cast(&*first);
-    storage.d_alt_keys         = thrust::raw_pointer_cast(&temp_keys[0]);
-    storage.d_spine            = thrust::raw_pointer_cast(&temp_spine[0]);
-    storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]);
-
-    // perform the sort
-    sorter.EnactSort(storage);
-    
-    // radix sort sometimes leaves results in the alternate buffers
-    if (storage.using_alternate_storage)
-    {
-        thrust::copy(exec, temp_keys.begin(), temp_keys.end(), first);
-    }
-}
-
-///////////////////////
-// Key-Value Sorting //
-///////////////////////
-
-// sort values directly
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_radix_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              thrust::detail::true_type)
-{
-    typedef typename thrust::iterator_value<RandomAccessIterator1>::type K;
-    typedef typename thrust::iterator_value<RandomAccessIterator2>::type V;
-    
-    unsigned int num_elements = last1 - first1;
-
-    // ensure data is properly aligned
-    if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first1), 2*sizeof(K)))
-    {
-        thrust::detail::temporary_array<K,DerivedPolicy> aligned_keys(exec, first1, last1);
-        stable_radix_sort_by_key(exec, aligned_keys.begin(), aligned_keys.end(), first2);
-        thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first1);
-        return;
-    }
-    if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first2), 2*sizeof(V)))
-    {
-        thrust::detail::temporary_array<V,DerivedPolicy> aligned_values(exec, first2, first2 + num_elements);
-        stable_radix_sort_by_key(exec, first1, last1, aligned_values.begin());
-        thrust::copy(exec, aligned_values.begin(), aligned_values.end(), first2);
-        return;
-    }
-   
-    thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor<K,V> sorter(num_elements);
-    thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage<K,V>    storage;
-    
-    // allocate temporary buffers
-    thrust::detail::temporary_array<K,    DerivedPolicy> temp_keys(exec, num_elements);
-    thrust::detail::temporary_array<V,    DerivedPolicy> temp_values(exec, num_elements);
-    thrust::detail::temporary_array<int,  DerivedPolicy> temp_spine(exec, sorter.SpineElements());
-    thrust::detail::temporary_array<bool, DerivedPolicy> temp_from_alt(exec, 2);
-
-    // define storage
-    storage.d_keys             = thrust::raw_pointer_cast(&*first1);
-    storage.d_values           = thrust::raw_pointer_cast(&*first2);
-    storage.d_alt_keys         = thrust::raw_pointer_cast(&temp_keys[0]);
-    storage.d_alt_values       = thrust::raw_pointer_cast(&temp_values[0]);
-    storage.d_spine            = thrust::raw_pointer_cast(&temp_spine[0]);
-    storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]);
-
-    // perform the sort
-    sorter.EnactSort(storage);
-    
-    // radix sort sometimes leaves results in the alternate buffers
-    if (storage.using_alternate_storage)
-    {
-        thrust::copy(exec, temp_keys.begin(),   temp_keys.end(),   first1);
-        thrust::copy(exec, temp_values.begin(), temp_values.end(), first2);
-    }
-}
-
-
-// sort values indirectly
-template<typename DerivedPolicy, 
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_radix_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              thrust::detail::false_type)
-{
-    typedef typename thrust::iterator_value<RandomAccessIterator2>::type V;
-    
-    unsigned int num_elements = last1 - first1;
-
-    // sort with integer values and then permute the real values accordingly
-    thrust::detail::temporary_array<unsigned int,DerivedPolicy> permutation(exec, num_elements);
-    thrust::sequence(exec, permutation.begin(), permutation.end());
-
-    stable_radix_sort_by_key(exec, first1, last1, permutation.begin());
-    
-    // copy values into temp vector and then permute
-    thrust::detail::temporary_array<V,DerivedPolicy> temp_values(exec, first2, first2 + num_elements);
-   
-    // permute values
-    thrust::gather(exec,
-                   permutation.begin(), permutation.end(),
-                   temp_values.begin(),
-                   first2);
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_radix_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2)
-{
-    typedef typename thrust::iterator_value<RandomAccessIterator2>::type V;
-
-    // decide how to handle values
-    static const bool sort_values_directly = thrust::detail::is_trivial_iterator<RandomAccessIterator2>::value &&
-                                             thrust::detail::is_arithmetic<V>::value &&
-                                             sizeof(V) <= 8;    // TODO profile this
-
-    // XXX WAR unused variable warning
-    (void) sort_values_directly;
-
-    stable_radix_sort_by_key(exec, first1, last1, first2, 
-                             thrust::detail::integral_constant<bool, sort_values_directly>());
-}
-
-} // end namespace detail
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
-
-#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h
deleted file mode 100644
index b563654895..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-
-template<unsigned int count,
-         typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Compare>
-void stable_sort_by_count(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          Compare comp);
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/detail/stable_sort_by_count.inl>
-
diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl
deleted file mode 100644
index 5efb36b9b0..0000000000
--- a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/detail/stable_sort_by_count.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/uninitialized.h>
-#include <thrust/system/cuda/detail/block/merging_sort.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-namespace stable_sort_by_count_detail
-{
-
-
-template<unsigned int block_size,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering,
-         typename Context>
-struct stable_sort_by_count_closure
-{
-  typedef Context context_type;
-
-  RandomAccessIterator1 keys_first;
-  RandomAccessIterator2 values_first;
-  StrictWeakOrdering comp; // XXX this should probably be thrust::detail::device_function
-  const unsigned int n;
-  context_type context;
-
-  stable_sort_by_count_closure(RandomAccessIterator1 keys_first,
-                               RandomAccessIterator2 values_first,
-                               StrictWeakOrdering comp,
-                               const unsigned int n,
-                               context_type context = context_type())
-    : keys_first(keys_first),
-      values_first(values_first),
-      comp(comp),
-      n(n),
-      context(context)
-  {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename iterator_value<RandomAccessIterator1>::type KeyType;
-    typedef typename iterator_value<RandomAccessIterator2>::type ValueType;
-  
-    __shared__ uninitialized_array<KeyType,block_size>   s_keys;
-    __shared__ uninitialized_array<ValueType,block_size> s_data;
-  
-    const unsigned int grid_size = context.grid_dimension() * context.block_dimension();
-  
-    // block_offset records the global index of this block's 0th thread
-    unsigned int block_offset = context.block_index() * block_size;
-    unsigned int i = context.thread_index() + block_offset;
-  
-    // advance iterators
-    keys_first   += i;
-    values_first += i;
-  
-    for(;
-        block_offset < n;
-        block_offset += grid_size, i += grid_size, keys_first += grid_size, values_first += grid_size)
-    {
-      context.barrier();
-      // copy input to shared
-      if(i < n)
-      {
-        s_keys[context.thread_index()] = *keys_first;
-        s_data[context.thread_index()] = *values_first;
-      } // end if
-      context.barrier();
-  
-      // this block could be partially full
-      unsigned int length = block_size;
-      if(block_offset + block_size > n)
-      {
-        length = n - block_offset;
-      } // end if
-  
-      // run merge_sort over the block
-      block::merging_sort(context, s_keys.begin(), s_data.begin(), length, comp);
-  
-      // write result
-      if(i < n)
-      {
-        *keys_first   = s_keys[context.thread_index()];
-        *values_first = s_data[context.thread_index()];
-      } // end if
-    } // end for i
-  }
-
-
-  static size_t max_grid_size()
-  {
-    const device_properties_t& properties = device_properties();
-
-    const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount;
-    const unsigned int max_blocks  = properties.maxGridSize[0];
-    
-    return thrust::min<size_t>(max_blocks, 3 * max_threads / block_size);
-  } // end max_grid_size()
-
-
-  size_t grid_size() const
-  {
-    // compute the maximum number of blocks we can launch on this arch
-    const unsigned int max_num_blocks = max_grid_size();
-
-    // first, sort within each block
-    size_t num_blocks = n / block_size;
-    if(n % block_size) ++num_blocks;
-
-    return thrust::min<size_t>(num_blocks, max_num_blocks);
-  } // end grid_size()
-}; // stable_sort_by_count_closure
-
-
-} // end stable_sort_by_count_detail
-
-
-template<unsigned int count,
-         typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Compare>
-void stable_sort_by_count(execution_policy<DerivedPolicy> &,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          Compare comp)
-{
-  typedef stable_sort_by_count_detail::stable_sort_by_count_closure<
-    count,
-    RandomAccessIterator1,
-    RandomAccessIterator2,
-    Compare,
-    detail::statically_blocked_thread_array<count>
-  > Closure;
-
-  Closure closure(keys_first, values_first, comp, keys_last - keys_first);
- 
-  // do an odd-even sort per block of data
-  detail::launch_closure(closure, closure.grid_size(), count);
-} // end stable_sort_by_count()
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/detail/uninitialized.h b/compat/thrust/system/cuda/detail/detail/uninitialized.h
deleted file mode 100644
index a3e3dd2e76..0000000000
--- a/compat/thrust/system/cuda/detail/detail/uninitialized.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/detail/alignment.h>
-#include <cstddef>
-#include <new>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-
-template<typename T>
-  class uninitialized
-{
-  private:
-    typename aligned_storage<
-      sizeof(T),
-      alignment_of<T>::value
-    >::type storage;
-
-    __device__ __thrust_forceinline__ const T* ptr() const
-    {
-      return reinterpret_cast<const T*>(storage.data);
-    }
-
-    __device__ __thrust_forceinline__ T* ptr()
-    {
-      return reinterpret_cast<T*>(storage.data);
-    }
-
-  public:
-    // copy assignment
-    __device__ __thrust_forceinline__ uninitialized<T> &operator=(const T &other)
-    {
-      T& self = *this;
-      self = other;
-      return *this;
-    }
-
-    __device__ __thrust_forceinline__ T& get()
-    {
-      return *ptr();
-    }
-
-    __device__ __thrust_forceinline__ const T& get() const
-    {
-      return *ptr();
-    }
-
-    __device__ __thrust_forceinline__ operator T& ()
-    {
-      return get();
-    }
-
-    __device__ __thrust_forceinline__ operator const T&() const
-    {
-      return get();
-    }
-
-    __thrust_forceinline__ __device__ void construct()
-    {
-      ::new(ptr()) T();
-    }
-
-    template<typename Arg>
-    __thrust_forceinline__ __device__ void construct(const Arg &a)
-    {
-      ::new(ptr()) T(a);
-    }
-
-    template<typename Arg1, typename Arg2>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2)
-    {
-      ::new(ptr()) T(a1,a2);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3)
-    {
-      ::new(ptr()) T(a1,a2,a3);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5,a6);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7, typename Arg8>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7, typename Arg8, typename Arg9>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9);
-    }
-
-    template<typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5, typename Arg6, typename Arg7, typename Arg8, typename Arg9, typename Arg10>
-    __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9, const Arg10 &a10)
-    {
-      ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10);
-    }
-
-    __thrust_forceinline__ __device__ void destroy()
-    {
-      T& self = *this;
-      self.~T();
-    }
-};
-
-
-template<typename T, std::size_t N>
-  class uninitialized_array
-{
-  public:
-    typedef T             value_type; 
-    typedef T&            reference;
-    typedef const T&      const_reference;
-    typedef T*            pointer;
-    typedef const T*      const_pointer;
-    typedef pointer       iterator;
-    typedef const_pointer const_iterator;
-    typedef std::size_t   size_type;
-
-    __thrust_forceinline__ __device__ iterator begin()
-    {
-      return data();
-    }
-
-    __thrust_forceinline__ __device__ const_iterator begin() const
-    {
-      return data();
-    }
-
-    __thrust_forceinline__ __device__ iterator end()
-    {
-      return begin() + size();
-    }
-
-    __thrust_forceinline__ __device__ const_iterator end() const
-    {
-      return begin() + size();
-    }
-
-    __thrust_forceinline__ __device__ const_iterator cbegin() const
-    {
-      return begin();
-    }
-
-    __thrust_forceinline__ __device__ const_iterator cend() const
-    {
-      return end();
-    }
-
-    __thrust_forceinline__ __device__ size_type size() const
-    {
-      return N;
-    }
-
-    __thrust_forceinline__ __device__ bool empty() const
-    {
-      return false;
-    }
-
-    __thrust_forceinline__ __device__ T* data()
-    {
-      return impl.get();
-    }
-
-    __thrust_forceinline__ __device__ const T* data() const
-    {
-      return impl.get();
-    }
-
-    // element access
-    __thrust_forceinline__ __device__ reference operator[](size_type n)
-    {
-      return data()[n];
-    }
-
-    __thrust_forceinline__ __device__ const_reference operator[](size_type n) const
-    {
-      return data()[n];
-    }
-
-    __thrust_forceinline__ __device__ reference front()
-    {
-      return *data();
-    }
-
-    __thrust_forceinline__ __device__ const_reference front() const
-    {
-      return *data();
-    }
-
-    __thrust_forceinline__ __device__ reference back()
-    {
-      return data()[size() - size_type(1)];
-    }
-
-    __thrust_forceinline__ __device__ const_reference back() const
-    {
-      return data()[size() - size_type(1)];
-    }
-
-  private:
-    uninitialized<T[N]> impl;
-};
-
-
-} // end detail
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/equal.h b/compat/thrust/system/cuda/detail/equal.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/equal.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/error.inl b/compat/thrust/system/cuda/detail/error.inl
deleted file mode 100644
index 41b928fa32..0000000000
--- a/compat/thrust/system/cuda/detail/error.inl
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/system/cuda/error.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-
-error_code make_error_code(cuda::errc::errc_t e)
-{
-  return error_code(static_cast<int>(e), cuda_category());
-} // end make_error_code()
-
-
-error_condition make_error_condition(cuda::errc::errc_t e)
-{
-  return error_condition(static_cast<int>(e), cuda_category());
-} // end make_error_condition()
-
-
-namespace cuda
-{
-
-namespace detail
-{
-
-
-class cuda_error_category
-  : public error_category
-{
-  public:
-    inline cuda_error_category(void) {}
-
-    inline virtual const char *name(void) const
-    {
-      return "cuda";
-    }
-
-    inline virtual std::string message(int ev) const
-    {
-      static const std::string unknown_err("Unknown error");
-      const char *c_str = ::cudaGetErrorString(static_cast<cudaError_t>(ev));
-      return c_str ? std::string(c_str) : unknown_err;
-    }
-
-    inline virtual error_condition default_error_condition(int ev) const
-    {
-      using namespace cuda::errc;
-
-      if(ev < ::cudaErrorApiFailureBase)
-      {
-        return make_error_condition(static_cast<errc_t>(ev));
-      }
-
-      return system_category().default_error_condition(ev);
-    }
-}; // end cuda_error_category
-
-} // end detail
-
-} // end namespace cuda
-
-
-const error_category &cuda_category(void)
-{
-  static const cuda::detail::cuda_error_category result;
-  return result;
-}
-
-
-} // end namespace system
-
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/execution_policy.h b/compat/thrust/system/cuda/detail/execution_policy.h
deleted file mode 100644
index 7dae04c1eb..0000000000
--- a/compat/thrust/system/cuda/detail/execution_policy.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-// put the canonical tag in the same ns as the backend's entry points
-namespace detail
-{
-
-// this awkward sequence of definitions arise
-// from the desire both for tag to derive
-// from execution_policy and for execution_policy
-// to convert to tag (when execution_policy is not
-// an ancestor of tag)
-
-// forward declaration of tag
-struct tag;
-
-// forward declaration of execution_policy
-template<typename> struct execution_policy;
-
-// specialize execution_policy for tag
-template<>
-  struct execution_policy<tag>
-    : thrust::execution_policy<tag>
-{};
-
-// tag's definition comes before the
-// generic definition of execution_policy
-struct tag : execution_policy<tag> {};
-
-// allow conversion to tag when it is not a successor
-template<typename Derived>
-  struct execution_policy
-    : thrust::execution_policy<Derived>
-{
-  // allow conversion to tag
-  inline operator tag () const
-  {
-    return tag();
-  }
-};
-
-
-template<typename System1, typename System2>
-  struct cross_system
-    : thrust::execution_policy<cross_system<System1,System2> >
-{
-  inline __host__ __device__
-  cross_system(thrust::execution_policy<System1> &system1,
-               thrust::execution_policy<System2> &system2)
-    : system1(system1), system2(system2)
-  {}
-
-  thrust::execution_policy<System1> &system1;
-  thrust::execution_policy<System2> &system2;
-
-  inline __host__ __device__
-  cross_system<System2,System1> rotate() const
-  {
-    return cross_system<System2,System1>(system2,system1);
-  }
-};
-
-
-// overloads of select_system
-
-// cpp interop
-template<typename System1, typename System2>
-inline __host__ __device__
-cross_system<System1,System2> select_system(const execution_policy<System1> &system1, const thrust::cpp::execution_policy<System2> &system2)
-{
-  thrust::execution_policy<System1> &non_const_system1 = const_cast<execution_policy<System1>&>(system1);
-  thrust::cpp::execution_policy<System2> &non_const_system2 = const_cast<thrust::cpp::execution_policy<System2>&>(system2);
-  return cross_system<System1,System2>(non_const_system1,non_const_system2);
-}
-
-
-template<typename System1, typename System2>
-inline __host__ __device__
-cross_system<System1,System2> select_system(const thrust::cpp::execution_policy<System1> &system1, execution_policy<System2> &system2)
-{
-  thrust::cpp::execution_policy<System1> &non_const_system1 = const_cast<thrust::cpp::execution_policy<System1>&>(system1);
-  thrust::execution_policy<System2> &non_const_system2 = const_cast<execution_policy<System2>&>(system2);
-  return cross_system<System1,System2>(non_const_system1,non_const_system2);
-}
-
-
-} // end detail
-
-// alias execution_policy and tag here
-using thrust::system::cuda::detail::execution_policy;
-using thrust::system::cuda::detail::tag;
-
-} // end cuda
-} // end system
-
-// alias items at top-level
-namespace cuda
-{
-
-using thrust::system::cuda::execution_policy;
-using thrust::system::cuda::tag;
-
-} // end cuda
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/extern_shared_ptr.h b/compat/thrust/system/cuda/detail/extern_shared_ptr.h
deleted file mode 100644
index 5f34cc89ba..0000000000
--- a/compat/thrust/system/cuda/detail/extern_shared_ptr.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename T>
-  class extern_shared_ptr
-{
-// don't attempt to compile with any compiler other than nvcc
-// due to use of __shared__ below
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-  public:
-    __device__
-    inline operator T * (void)
-    {
-      extern __shared__ int4 smem[];
-      return reinterpret_cast<T*>(smem);
-    }
-
-    __device__
-    inline operator const T * (void) const
-    {
-      extern __shared__ int4 smem[];
-      return reinterpret_cast<const T*>(smem);
-    }
-#endif // THRUST_DEVICE_COMPILER_NVCC
-}; // end extern_shared_ptr
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/extrema.h b/compat/thrust/system/cuda/detail/extrema.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/extrema.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/fill.h b/compat/thrust/system/cuda/detail/fill.h
deleted file mode 100644
index 9c753bb9aa..0000000000
--- a/compat/thrust/system/cuda/detail/fill.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fill.h
- *  \brief Device implementation of fill.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void fill(execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value);
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(execution_policy<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/fill.inl>
-
diff --git a/compat/thrust/system/cuda/detail/fill.inl b/compat/thrust/system/cuda/detail/fill.inl
deleted file mode 100644
index 3c1feb8ac8..0000000000
--- a/compat/thrust/system/cuda/detail/fill.inl
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fill.inl
- *  \brief Inline file for fill.h.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/distance.h>
-#include <thrust/detail/util/align.h>
-#include <thrust/generate.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/internal_functional.h>
-
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace detail
-{
-
-
-template<typename WidePtr, typename T>
-  WidePtr widen_raw_ptr(T *ptr)
-{
-  typedef thrust::detail::pointer_traits<WidePtr> WideTraits;
-  typedef typename WideTraits::element_type       WideT;
-
-  // carefully widen the pointer to avoid warnings about conversions between differently aligned types on ARM
-  WideT *wide_raw_ptr = static_cast<WideT*>(static_cast<void*>(ptr));
-
-  return WideTraits::pointer_to(*wide_raw_ptr);
-}
-
-
-template<typename WideType, typename DerivedPolicy, typename Pointer, typename Size, typename T>
-  Pointer wide_fill_n(execution_policy<DerivedPolicy> &exec,
-                      Pointer first,
-                      Size n,
-                      const T &value)
-{
-  typedef typename thrust::iterator_value<Pointer>::type OutputType;
-
-  size_t ALIGNMENT_BOUNDARY = 128; // begin copying blocks at this byte boundary
-
-  WideType   wide_exemplar;
-  OutputType narrow_exemplars[sizeof(WideType) / sizeof(OutputType)];
-
-  for (size_t i = 0; i < sizeof(WideType) / sizeof(OutputType); i++)
-      narrow_exemplars[i] = static_cast<OutputType>(value);
-
-  // cast through char * to avoid type punning warnings
-  for (size_t i = 0; i < sizeof(WideType); i++)
-      reinterpret_cast<char *>(&wide_exemplar)[i] = reinterpret_cast<char *>(narrow_exemplars)[i];
-
-  OutputType *first_raw = thrust::raw_pointer_cast(first);
-  OutputType *last_raw  = first_raw + n;
-
-  OutputType *block_first_raw = (thrust::min)(first_raw + n,   thrust::detail::util::align_up(first_raw, ALIGNMENT_BOUNDARY));
-  OutputType *block_last_raw  = (thrust::max)(block_first_raw, thrust::detail::util::align_down(last_raw, sizeof(WideType)));
-
-  // rebind Pointer to WideType
-  typedef typename thrust::detail::rebind_pointer<Pointer,WideType>::type WidePtr;
-
-  // point to the widened range
-  // XXX since we've got an execution policy, we probably don't even need to deal with rebinding pointers
-  WidePtr block_first_wide = widen_raw_ptr<WidePtr>(block_first_raw);
-  WidePtr block_last_wide  = widen_raw_ptr<WidePtr>(block_last_raw);
-
-  thrust::generate(exec, first,                   Pointer(block_first_raw),    thrust::detail::fill_functor<OutputType>(value));
-  thrust::generate(exec, block_first_wide,        block_last_wide,             thrust::detail::fill_functor<WideType>(wide_exemplar));
-  thrust::generate(exec, Pointer(block_last_raw), first + n,                   thrust::detail::fill_functor<OutputType>(value));
-
-  return first + n;
-}
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(execution_policy<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value,
-                        thrust::detail::false_type)
-{
-  thrust::detail::fill_functor<T> func(value); 
-  return thrust::generate_n(exec, first, n, func);
-}
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(execution_policy<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value,
-                        thrust::detail::true_type)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  
-  if ( thrust::detail::util::is_aligned<OutputType>(thrust::raw_pointer_cast(&*first)) )
-  {
-      if (compute_capability() < 20)
-      {
-        // 32-bit writes are faster on G80 and GT200
-        typedef unsigned int WideType;
-        wide_fill_n<WideType>(exec, &*first, n, value);
-      }
-      else
-      {
-        // 64-bit writes are faster on Fermi
-        typedef unsigned long long WideType;
-        wide_fill_n<WideType>(exec, &*first, n, value);
-      }
-
-      return first + n;
-  }
-  else
-  {
-    return fill_n(exec, first, n, value, thrust::detail::false_type());
-  }
-}
-
-} // end detail
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(execution_policy<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type      OutputType;
-
-  // we're compiling with nvcc, launch a kernel
-  const bool use_wide_fill = thrust::detail::is_trivial_iterator<OutputIterator>::value
-      && thrust::detail::has_trivial_assign<OutputType>::value
-      && (sizeof(OutputType) == 1 || sizeof(OutputType) == 2 || sizeof(OutputType) == 4);
-
-  // XXX WAR usused variable warning
-  (void)use_wide_fill;
-
-  return detail::fill_n(exec, first, n, value, thrust::detail::integral_constant<bool, use_wide_fill>());
-}
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void fill(execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value)
-{
-  thrust::system::cuda::detail::fill_n(exec, first, thrust::distance(first,last), value);
-} // end fill()
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/find.h b/compat/thrust/system/cuda/detail/find.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/find.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/for_each.h b/compat/thrust/system/cuda/detail/for_each.h
deleted file mode 100644
index 56be13b177..0000000000
--- a/compat/thrust/system/cuda/detail/for_each.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.h
- *  \brief Defines the interface for a function that executes a 
- *  function or functional for each value in a given range.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename UnaryFunction>
-  RandomAccessIterator for_each(execution_policy<DerivedPolicy> &s,
-                                RandomAccessIterator first,
-                                RandomAccessIterator last,
-                                UnaryFunction f);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-  RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &s,
-                                  RandomAccessIterator first,
-                                  Size n,
-                                  UnaryFunction f);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/for_each.inl>
-
diff --git a/compat/thrust/system/cuda/detail/for_each.inl b/compat/thrust/system/cuda/detail/for_each.inl
deleted file mode 100644
index be6e56131f..0000000000
--- a/compat/thrust/system/cuda/detail/for_each.inl
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.inl
- *  \brief Inline file for for_each.h.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/static_assert.h>
-
-#include <thrust/distance.h>
-#include <thrust/for_each.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/launch_calculator.h>
-#include <thrust/detail/util/blocking.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-
-#include <limits>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace for_each_n_detail
-{
-
-
-template<typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction,
-         typename Context>
-struct for_each_n_closure
-{
-  typedef void result_type;
-  typedef Context context_type;
-
-  RandomAccessIterator first;
-  Size n;
-  thrust::detail::device_function<UnaryFunction,void> f;
-  Context context;
-
-  for_each_n_closure(RandomAccessIterator first,
-                     Size n,
-                     UnaryFunction f,
-                     Context context = Context())
-    : first(first), n(n), f(f), context(context)
-  {}
-
-  __device__ __thrust_forceinline__
-  result_type operator()(void)
-  {
-    const Size grid_size = context.block_dimension() * context.grid_dimension();
-
-    Size i = context.linear_index();
-
-    // advance iterator
-    first += i;
-
-    while(i < n)
-    {
-      f(*first);
-      i += grid_size;
-      first += grid_size;
-    }
-  }
-}; // end for_each_n_closure
-
-
-template<typename Closure, typename Size>
-thrust::tuple<size_t,size_t> configure_launch(Size n)
-{
-  // calculate launch configuration
-  detail::launch_calculator<Closure> calculator;
-  
-  thrust::tuple<size_t, size_t, size_t> config = calculator.with_variable_block_size();
-  size_t max_blocks = thrust::get<0>(config);
-  size_t block_size = thrust::get<1>(config);
-  size_t num_blocks = thrust::min(max_blocks, thrust::detail::util::divide_ri<size_t>(n, block_size));
-
-  return thrust::make_tuple(num_blocks, block_size);
-}
-
-
-template<typename Size>
-bool use_big_closure(Size n, unsigned int little_grid_size)
-{
-  // use the big closure when n will not fit within an unsigned int
-  // or if incrementing an unsigned int by little_grid_size would overflow
-  // the counter
-  
-  Size threshold = std::numeric_limits<unsigned int>::max();
-
-  bool result = (sizeof(Size) > sizeof(unsigned int)) && (n > threshold);
-
-  if(!result)
-  {
-    // check if we'd overflow the little closure's counter
-    unsigned int little_n = static_cast<unsigned int>(n);
-
-    if((little_n - 1u) + little_grid_size < little_n)
-    {
-      result = true;
-    }
-  }
-
-  return result;
-}
-
-
-} // end for_each_n_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &,
-                                RandomAccessIterator first,
-                                Size n,
-                                UnaryFunction f)
-{
-  // we're attempting to launch a kernel, assert we're compiling with nvcc
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-
-  if(n <= 0) return first;  // empty range
-  
-  // create two candidate closures to implement the for_each
-  // choose between them based on the whether we can fit n into a smaller integer
-  // and whether or not we'll overflow the closure's counter
-
-  typedef detail::blocked_thread_array Context;
-  typedef for_each_n_detail::for_each_n_closure<RandomAccessIterator, Size, UnaryFunction, Context>         BigClosure;
-  typedef for_each_n_detail::for_each_n_closure<RandomAccessIterator, unsigned int, UnaryFunction, Context> LittleClosure;
-
-  BigClosure    big_closure(first, n, f);
-  LittleClosure little_closure(first, static_cast<unsigned int>(n), f);
-
-  thrust::tuple<size_t, size_t> little_config = for_each_n_detail::configure_launch<LittleClosure>(n);
-
-  unsigned int little_grid_size = thrust::get<0>(little_config) * thrust::get<1>(little_config);
-
-  if(for_each_n_detail::use_big_closure(n, little_grid_size))
-  {
-    // launch the big closure
-    thrust::tuple<size_t, size_t> big_config = for_each_n_detail::configure_launch<BigClosure>(n);
-    detail::launch_closure(big_closure, thrust::get<0>(big_config), thrust::get<1>(big_config));
-  }
-  else
-  {
-    // launch the little closure
-    detail::launch_closure(little_closure, thrust::get<0>(little_config), thrust::get<1>(little_config));
-  }
-
-  return first + n;
-} 
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename UnaryFunction>
-  InputIterator for_each(execution_policy<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         UnaryFunction f)
-{
-  return cuda::detail::for_each_n(exec, first, thrust::distance(first,last), f);
-} // end for_each()
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/gather.h b/compat/thrust/system/cuda/detail/gather.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/gather.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/generate.h b/compat/thrust/system/cuda/detail/generate.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/generate.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/get_value.h b/compat/thrust/system/cuda/detail/get_value.h
deleted file mode 100644
index 273023f612..0000000000
--- a/compat/thrust/system/cuda/detail/get_value.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/system/cuda/detail/assign_value.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-namespace
-{
-
-
-template<typename DerivedPolicy, typename Pointer>
-inline __host__ __device__
-  typename thrust::iterator_value<Pointer>::type
-    get_value_msvc2005_war(execution_policy<DerivedPolicy> &exec, Pointer ptr)
-{
-  typedef typename thrust::iterator_value<Pointer>::type result_type;
-
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static result_type host_path(execution_policy<DerivedPolicy> &exec, Pointer ptr)
-    {
-      // when called from host code, implement with assign_value
-      // note that this requires a type with default constructor
-      result_type result;
-
-      thrust::host_system_tag host_tag;
-      cross_system<thrust::host_system_tag, DerivedPolicy> systems(host_tag, exec);
-      assign_value(systems, &result, ptr);
-
-      return result;
-    }
-
-    __device__ inline static result_type device_path(execution_policy<DerivedPolicy> &, Pointer ptr)
-    {
-      // when called from device code, just do simple deref
-      return *thrust::raw_pointer_cast(ptr);
-    }
-  };
-
-#ifndef __CUDA_ARCH__
-  return war_nvbugs_881631::host_path(exec, ptr);
-#else
-  return war_nvbugs_881631::device_path(exec, ptr);
-#endif // __CUDA_ARCH__
-} // end get_value_msvc2005_war()
-
-
-} // end anon namespace
-
-
-template<typename DerivedPolicy, typename Pointer>
-inline __host__ __device__
-  typename thrust::iterator_value<Pointer>::type
-    get_value(execution_policy<DerivedPolicy> &exec, Pointer ptr)
-{
-  return get_value_msvc2005_war(exec,ptr);
-} // end get_value()
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h b/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h
deleted file mode 100644
index e6c0d2812e..0000000000
--- a/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to check for the existence of macros
-// such as __host__ and __device__, which may already be defined by thrust
-// and to undefine them before entering cuda_runtime_api.h (which will redefine them)
-
-// we only try to do this stuff if cuda/include/host_defines.h has been included
-#if !defined(__HOST_DEFINES_H__)
-
-#ifdef __host__
-#undef __host__
-#endif // __host__
-
-#ifdef __device__
-#undef __device__
-#endif // __device__
-
-#endif // __HOST_DEFINES_H__
-
-#include <cuda_runtime_api.h>
-
diff --git a/compat/thrust/system/cuda/detail/inner_product.h b/compat/thrust/system/cuda/detail/inner_product.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/inner_product.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/iter_swap.h b/compat/thrust/system/cuda/detail/iter_swap.h
deleted file mode 100644
index 9b2bcf069f..0000000000
--- a/compat/thrust/system/cuda/detail/iter_swap.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/swap.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename Pointer1, typename Pointer2>
-inline __host__ __device__
-void iter_swap(tag, Pointer1 a, Pointer2 b)
-{
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    __host__ inline static void host_path(Pointer1 a, Pointer2 b)
-    {
-      thrust::swap_ranges(a, a + 1, b);
-    }
-
-    __device__ inline static void device_path(Pointer1 a, Pointer2 b)
-    {
-      using thrust::swap;
-      swap(*thrust::raw_pointer_cast(a),
-           *thrust::raw_pointer_cast(b));
-    }
-  };
-
-#ifndef __CUDA_ARCH__
-  return war_nvbugs_881631::host_path(a,b);
-#else
-  return war_nvbugs_881631::device_path(a,b);
-#endif // __CUDA_ARCH__
-} // end iter_swap()
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/logical.h b/compat/thrust/system/cuda/detail/logical.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/logical.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/malloc_and_free.h b/compat/thrust/system/cuda/detail/malloc_and_free.h
deleted file mode 100644
index 676dd7cd5a..0000000000
--- a/compat/thrust/system/cuda/detail/malloc_and_free.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-#include <thrust/system/system_error.h>
-#include <thrust/system/cuda/error.h>
-#include <thrust/system/detail/bad_alloc.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-// note that malloc returns a raw pointer to avoid
-// depending on the heavyweight thrust/system/cuda/memory.h header
-template<typename DerivedPolicy>
-  void *malloc(execution_policy<DerivedPolicy> &, std::size_t n)
-{
-  void *result = 0;
-
-  cudaError_t error = cudaMalloc(reinterpret_cast<void**>(&result), n);
-
-  if(error)
-  {
-    throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(error).c_str());
-  } // end if
-
-  return result;
-} // end malloc()
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void free(execution_policy<DerivedPolicy> &, Pointer ptr)
-{
-  cudaError_t error = cudaFree(thrust::raw_pointer_cast(ptr));
-
-  if(error)
-  {
-    throw thrust::system_error(error, thrust::cuda_category());
-  } // end error
-} // end free()
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/memory.inl b/compat/thrust/system/cuda/detail/memory.inl
deleted file mode 100644
index 998b54e345..0000000000
--- a/compat/thrust/system/cuda/detail/memory.inl
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/memory.h>
-#include <thrust/system/cuda/detail/malloc_and_free.h>
-#include <limits>
-
-namespace thrust
-{
-
-// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing
-//     pointer_raw_pointer for pointer by specializing it here
-//     note that we specialize it here, before the use of raw_pointer_cast
-//     below, which causes pointer_raw_pointer's instantiation
-#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400)
-namespace detail
-{
-
-template<typename T>
-  struct pointer_raw_pointer< thrust::cuda::pointer<T> >
-{
-  typedef typename thrust::cuda::pointer<T>::raw_pointer type;
-}; // end pointer_raw_pointer
-
-} // end detail
-#endif
-
-namespace system
-{
-namespace cuda
-{
-
-
-template<typename T>
-  template<typename OtherT>
-    reference<T> &
-      reference<T>
-        ::operator=(const reference<OtherT> &other)
-{
-  return super_t::operator=(other);
-} // end reference::operator=()
-
-template<typename T>
-  reference<T> &
-    reference<T>
-      ::operator=(const value_type &x)
-{
-  return super_t::operator=(x);
-} // end reference::operator=()
-
-template<typename T>
-__host__ __device__
-void swap(reference<T> a, reference<T> b)
-{
-  a.swap(b);
-} // end swap()
-
-pointer<void> malloc(std::size_t n)
-{
-  tag cuda_tag;
-  return pointer<void>(thrust::system::cuda::detail::malloc(cuda_tag, n));
-} // end malloc()
-
-template<typename T>
-pointer<T> malloc(std::size_t n)
-{
-  pointer<void> raw_ptr = thrust::system::cuda::malloc(sizeof(T) * n);
-  return pointer<T>(reinterpret_cast<T*>(raw_ptr.get()));
-} // end malloc()
-
-void free(pointer<void> ptr)
-{
-  tag cuda_tag;
-  return thrust::system::cuda::detail::free(cuda_tag, ptr.get());
-} // end free()
-
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/merge.h b/compat/thrust/system/cuda/detail/merge.h
deleted file mode 100644
index e01b705470..0000000000
--- a/compat/thrust/system/cuda/detail/merge.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename RandomAccessIterator3,
-         typename StrictWeakOrdering>
-  RandomAccessIterator3 merge(execution_policy<DerivedPolicy> &exec,
-                              RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              RandomAccessIterator2 last2,
-                              RandomAccessIterator3 result,
-                              StrictWeakOrdering comp);
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/merge.inl>
-
diff --git a/compat/thrust/system/cuda/detail/merge.inl b/compat/thrust/system/cuda/detail/merge.inl
deleted file mode 100644
index bf7516fde1..0000000000
--- a/compat/thrust/system/cuda/detail/merge.inl
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/merge.h>
-#include <thrust/pair.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/function.h>
-#include <thrust/system/cuda/detail/detail/uninitialized.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/detail/util/blocking.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace merge_detail
-{
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename Size,
-         typename Compare>
-__device__ __thrust_forceinline__
-thrust::pair<Size,Size>
-  partition_search(RandomAccessIterator1 first1,
-                   RandomAccessIterator2 first2,
-                   Size diag,
-                   Size lower_bound1,
-                   Size upper_bound1,
-                   Size lower_bound2,
-                   Size upper_bound2,
-                   Compare comp)
-{
-  Size begin = thrust::max<Size>(lower_bound1, diag - upper_bound2);
-  Size end   = thrust::min<Size>(diag - lower_bound2, upper_bound1);
-
-  while(begin < end)
-  {
-    Size mid = (begin + end) / 2;
-    Size index1 = mid;
-    Size index2 = diag - mid - 1;
-
-    if(comp(first2[index2], first1[index1]))
-    {
-      end = mid;
-    }
-    else
-    {
-      begin = mid + 1;
-    }
-  }
-
-  return thrust::make_pair(begin, diag - begin);
-}
-
-
-template<typename Context, typename RandomAccessIterator1, typename Size, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
-__device__ __thrust_forceinline__
-void merge_n(Context &ctx,
-             RandomAccessIterator1 first1,
-             Size n1,
-             RandomAccessIterator2 first2,
-             Size n2,
-             RandomAccessIterator3 result,
-             Compare comp_,
-             unsigned int work_per_thread)
-{
-  const unsigned int block_size = ctx.block_dimension();
-  thrust::detail::device_function<Compare,bool> comp(comp_);
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2;
-
-  Size result_size = n1 + n2;
-
-  // this is just oversubscription_rate * block_size * work_per_thread
-  // but it makes no sense to send oversubscription_rate as an extra parameter
-  Size work_per_block = thrust::detail::util::divide_ri(result_size, ctx.grid_dimension());
-
-  using thrust::system::cuda::detail::detail::uninitialized;
-  __shared__ uninitialized<thrust::pair<Size,Size> > s_block_input_begin;
-
-  Size result_block_offset = ctx.block_index() * work_per_block;
-
-  // find where this block's input begins in both input sequences
-  if(ctx.thread_index() == 0)
-  {
-    s_block_input_begin = (ctx.block_index() == 0) ?
-      thrust::pair<Size,Size>(0,0) :
-      partition_search(first1, first2,
-                       result_block_offset,
-                       Size(0), n1,
-                       Size(0), n2,
-                       comp);
-  }
-
-  ctx.barrier();
-
-  // iterate to consume this block's input
-  Size work_per_iteration = block_size * work_per_thread;
-  thrust::pair<Size,Size> block_input_end = s_block_input_begin;
-  block_input_end.first  += work_per_iteration;
-  block_input_end.second += work_per_iteration;
-  Size result_block_offset_last = result_block_offset + thrust::min<Size>(work_per_block, result_size - result_block_offset);
-
-  for(;
-      result_block_offset < result_block_offset_last;
-      result_block_offset += work_per_iteration,
-      block_input_end.first  += work_per_iteration,
-      block_input_end.second += work_per_iteration
-     )
-  {
-    // find where this thread's input begins in both input sequences for this iteration
-    thrust::pair<Size,Size> thread_input_begin =
-      partition_search(first1, first2,
-                       Size(result_block_offset + ctx.thread_index() * work_per_thread),
-                       s_block_input_begin.get().first,  thrust::min<Size>(block_input_end.first , n1),
-                       s_block_input_begin.get().second, thrust::min<Size>(block_input_end.second, n2),
-                       comp);
-
-    ctx.barrier();
-
-    // XXX the performance impact of not keeping x1 & x2
-    //     in registers is about 10% for int32
-    uninitialized<value_type1> x1;
-    uninitialized<value_type2> x2;
-
-    // XXX this is just a serial merge -- try to simplify or abstract this loop
-    Size i = result_block_offset + ctx.thread_index() * work_per_thread;
-    Size last_i = i + thrust::min<Size>(work_per_thread, result_size - thread_input_begin.first - thread_input_begin.second);
-    for(;
-        i < last_i;
-        ++i)
-    {
-      // optionally load x1 & x2
-      bool output_x2 = true;
-      if(thread_input_begin.second < n2)
-      {
-        x2 = first2[thread_input_begin.second];
-      }
-      else
-      {
-        output_x2 = false;
-      }
-
-      if(thread_input_begin.first < n1)
-      {
-        x1 = first1[thread_input_begin.first];
-
-        if(output_x2)
-        {
-          output_x2 = comp(x2.get(), x1.get());
-        }
-      }
-
-      result[i] = output_x2 ? x2.get() : x1.get();
-
-      if(output_x2)
-      {
-        ++thread_input_begin.second;
-      }
-      else
-      {
-        ++thread_input_begin.first;
-      }
-    } // end for
-
-    // the block's last thread has conveniently located the
-    // beginning of the next iteration's input
-    if(ctx.thread_index() == block_size-1)
-    {
-      s_block_input_begin = thread_input_begin;
-    }
-    ctx.barrier();
-  } // end for
-} // end merge_n
-
-
-template<typename RandomAccessIterator1, typename Size, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
-  struct merge_n_closure
-{
-  typedef thrust::system::cuda::detail::detail::blocked_thread_array context_type;
-
-  RandomAccessIterator1 first1;
-  Size n1;
-  RandomAccessIterator2 first2;
-  Size n2;
-  RandomAccessIterator3 result;
-  Compare comp;
-  Size work_per_thread;
-
-  merge_n_closure(RandomAccessIterator1 first1, Size n1, RandomAccessIterator2 first2, Size n2, RandomAccessIterator3 result, Compare comp, Size work_per_thread)
-    : first1(first1), n1(n1), first2(first2), n2(n2), result(result), comp(comp), work_per_thread(work_per_thread)
-  {}
-
-  __device__ __forceinline__
-  void operator()()
-  {
-    context_type ctx;
-    merge_n(ctx, first1, n1, first2, n2, result, comp, work_per_thread);
-  }
-};
-
-
-// returns (work_per_thread, threads_per_block, oversubscription_factor)
-template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
-  thrust::tuple<unsigned int,unsigned int,unsigned int>
-    tunables(RandomAccessIterator1, RandomAccessIterator1, RandomAccessIterator2, RandomAccessIterator2, RandomAccessIterator3, Compare comp)
-{
-  // determined by empirical testing on GTX 480
-  // ~4500 Mkeys/s on GTX 480
-  const unsigned int work_per_thread         = 5;
-  const unsigned int threads_per_block       = 128;
-  const unsigned int oversubscription_factor = 30;
-
-  return thrust::make_tuple(work_per_thread, threads_per_block, oversubscription_factor);
-}
-
-
-} // end merge_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 merge(execution_policy<DerivedPolicy> &exec,
-                            RandomAccessIterator1 first1,
-                            RandomAccessIterator1 last1,
-                            RandomAccessIterator2 first2,
-                            RandomAccessIterator2 last2,
-                            RandomAccessIterator3 result,
-                            Compare comp)
-{
-  typedef typename thrust::iterator_difference<RandomAccessIterator1>::type Size;
-  Size n1 = last1 - first1;
-  Size n2 = last2 - first2;
-  typename thrust::iterator_difference<RandomAccessIterator1>::type n = n1 + n2;
-
-  // empty result
-  if(n <= 0) return result;
-
-  unsigned int work_per_thread = 0, threads_per_block = 0, oversubscription_factor = 0;
-  thrust::tie(work_per_thread,threads_per_block,oversubscription_factor)
-    = merge_detail::tunables(first1, last1, first2, last2, result, comp);
-
-  const unsigned int work_per_block = work_per_thread * threads_per_block;
-
-  const unsigned int num_processors = device_properties().multiProcessorCount;
-  const unsigned int num_blocks = thrust::min<int>(oversubscription_factor * num_processors, thrust::detail::util::divide_ri(n, work_per_block));
-
-  typedef merge_detail::merge_n_closure<RandomAccessIterator1,Size,RandomAccessIterator2,RandomAccessIterator3,Compare> closure_type;
-  closure_type closure(first1, n1, first2, n2, result, comp, work_per_thread);
-
-  detail::launch_closure(closure, num_blocks, threads_per_block);
-
-  return result + n1 + n2;
-} // end merge()
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/mismatch.h b/compat/thrust/system/cuda/detail/mismatch.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/mismatch.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/par.h b/compat/thrust/system/cuda/detail/par.h
deleted file mode 100644
index e56128c1d0..0000000000
--- a/compat/thrust/system/cuda/detail/par.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/detail/execute_with_allocator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-struct par_t : thrust::system::cuda::detail::execution_policy<par_t>
-{
-  par_t() : thrust::system::cuda::detail::execution_policy<par_t>() {}
-
-  template<typename Allocator>
-    thrust::detail::execute_with_allocator<Allocator, thrust::system::cuda::detail::execution_policy>
-      operator()(Allocator &alloc) const
-  {
-    return thrust::detail::execute_with_allocator<Allocator, thrust::system::cuda::detail::execution_policy>(alloc);
-  }
-};
-
-
-} // end detail
-
-
-static const detail::par_t par;
-
-
-} // end cuda
-} // end system
-
-
-// alias par here
-namespace cuda
-{
-
-
-using thrust::system::cuda::par;
-
-
-} // end cuda
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/partition.h b/compat/thrust/system/cuda/detail/partition.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/partition.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/reduce.h b/compat/thrust/system/cuda/detail/reduce.h
deleted file mode 100644
index d188f60f25..0000000000
--- a/compat/thrust/system/cuda/detail/reduce.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief Reduce a sequence of elements with a given length.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/reduce.inl>
-
diff --git a/compat/thrust/system/cuda/detail/reduce.inl b/compat/thrust/system/cuda/detail/reduce.inl
deleted file mode 100644
index 66b4ac72c2..0000000000
--- a/compat/thrust/system/cuda/detail/reduce.inl
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.inl
- *  \brief Inline file for reduce.h
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/distance.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/detail/generic/select_system.h>
-
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-#include <thrust/system/cuda/detail/extern_shared_ptr.h>
-#include <thrust/system/cuda/detail/block/reduce.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/launch_calculator.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-namespace reduce_detail
-{
-
-/*
- * Reduce a vector of n elements using binary_op()
- *
- * The order of reduction is not defined, so binary_op() should
- * be a commutative (and associative) operator such as 
- * (integer) addition.  Since floating point operations
- * do not completely satisfy these criteria, the result is 
- * generally not the same as a consecutive reduction of 
- * the elements.
- * 
- * Uses the same pattern as reduce6() in the CUDA SDK
- *
- */
-template <typename InputIterator,
-          typename Size,
-          typename T,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Context>
-struct unordered_reduce_closure
-{
-  InputIterator  input;
-  Size           n;
-  T              init;
-  OutputIterator output;
-  BinaryFunction binary_op;
-  unsigned int shared_array_size;
-
-  typedef Context context_type;
-  context_type context;
-
-  unordered_reduce_closure(InputIterator input, Size n, T init, OutputIterator output, BinaryFunction binary_op, unsigned int shared_array_size, Context context = Context())
-    : input(input), n(n), init(init), output(output), binary_op(binary_op), shared_array_size(shared_array_size), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
-    extern_shared_ptr<OutputType>  shared_array;
-
-    Size grid_size = context.block_dimension() * context.grid_dimension();
-
-    Size i = context.linear_index();
-      
-    input += i;
-
-    // compute reduction with all blockDim.x threads
-    OutputType sum = thrust::raw_reference_cast(*input);
-
-    i     += grid_size;
-    input += grid_size;
-
-    while (i < n)
-    {
-      OutputType val = thrust::raw_reference_cast(*input);
-
-      sum = binary_op(sum, val);
-
-      i      += grid_size;
-      input  += grid_size;
-    }
-
-    // write first shared_array_size values into shared memory
-    if (context.thread_index() < shared_array_size)
-      shared_array[context.thread_index()] = sum;  
-
-    // accumulate remaining values (if any) to shared memory in stages
-    if (context.block_dimension() > shared_array_size)
-    {
-      unsigned int lb = shared_array_size;
-      unsigned int ub = shared_array_size + lb;
-      
-      while (lb < context.block_dimension())
-      {
-        context.barrier();
-
-        if (lb <= context.thread_index() && context.thread_index() < ub)
-        {
-          OutputType tmp = shared_array[context.thread_index() - lb];
-          shared_array[context.thread_index() - lb] = binary_op(tmp, sum);
-        }
-
-        lb += shared_array_size;
-        ub += shared_array_size;
-      }
-    }
-    
-    context.barrier();
-
-    block::reduce_n(context, shared_array, thrust::min<unsigned int>(context.block_dimension(), shared_array_size), binary_op);
-  
-    if (context.thread_index() == 0)
-    {
-      OutputType tmp = shared_array[0];
-
-      if (context.grid_dimension() == 1)
-        tmp = binary_op(init, tmp);
-
-      output += context.block_index();
-      *output = tmp;
-    }
-  }
-};
-
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  // we're attempting to launch a kernel, assert we're compiling with nvcc
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-
-  typedef typename thrust::iterator_difference<InputIterator>::type difference_type;
-
-  difference_type n = thrust::distance(first,last);
-
-  if (n == 0)
-    return init;
-
-  typedef thrust::detail::temporary_array<OutputType, DerivedPolicy> OutputArray;
-  typedef typename OutputArray::iterator OutputIterator;
-
-  typedef detail::blocked_thread_array Context;
-  typedef unordered_reduce_closure<InputIterator,difference_type,OutputType,OutputIterator,BinaryFunction,Context> Closure;
-    
-  function_attributes_t attributes = detail::closure_attributes<Closure>();
-  
-  // TODO chose this in a more principled manner
-  size_t threshold = thrust::max<size_t>(2 * attributes.maxThreadsPerBlock, 1024);
-
-  device_properties_t properties = device_properties();
-
-  // launch configuration
-  size_t num_blocks; 
-  size_t block_size; 
-  size_t array_size; 
-  size_t smem_bytes; 
-
-  // first level reduction
-  if (static_cast<size_t>(n) < threshold)
-  {
-    num_blocks = 1;
-    block_size = thrust::min(static_cast<size_t>(n), static_cast<size_t>(attributes.maxThreadsPerBlock));
-    array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType));
-    smem_bytes = sizeof(OutputType) * array_size;
-  }
-  else
-  {
-    detail::launch_calculator<Closure> calculator;
-    
-    thrust::tuple<size_t,size_t,size_t> config = calculator.with_variable_block_size_available_smem();
-
-    num_blocks = thrust::min(thrust::get<0>(config), static_cast<size_t>(n) / thrust::get<1>(config));
-    block_size = thrust::get<1>(config);
-    array_size = thrust::min(block_size, thrust::get<2>(config) / sizeof(OutputType));
-    smem_bytes = sizeof(OutputType) * array_size;
-  }
- 
-  // TODO assert(n <= num_blocks * block_size);
-  // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory"
-
-  OutputArray output(exec, num_blocks);
-
-  Closure closure(first, n, init, output.begin(), binary_op, array_size);
-  
-  //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl;
-
-  detail::launch_closure(closure, num_blocks, block_size, smem_bytes);
-
-  // second level reduction
-  if (num_blocks > 1)
-  {
-    typedef detail::blocked_thread_array Context;
-    typedef unordered_reduce_closure<OutputIterator,difference_type,OutputType,OutputIterator,BinaryFunction,Context> Closure;
-
-    function_attributes_t attributes = detail::closure_attributes<Closure>();
-
-    num_blocks = 1;
-    block_size = thrust::min(output.size(), static_cast<size_t>(attributes.maxThreadsPerBlock));
-    array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType));
-    smem_bytes = sizeof(OutputType) * array_size;
-  
-    // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory"
-
-    Closure closure(output.begin(), output.size(), init, output.begin(), binary_op, array_size);
-
-    //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl;
-
-    detail::launch_closure(closure, num_blocks, block_size, smem_bytes);
-  }
-  
-  return output[0];
-} // end reduce
-
-} // end reduce_detail
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  return reduce_detail::reduce(exec, first, last, init, binary_op);
-} // end reduce()
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.h b/compat/thrust/system/cuda/detail/reduce_by_key.h
deleted file mode 100644
index 9b8ec10936..0000000000
--- a/compat/thrust/system/cuda/detail/reduce_by_key.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce_by_key.h
- *  \brief CUDA implementation of reduce_by_key
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/reduce_by_key.inl>
-
diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.inl b/compat/thrust/system/cuda/detail/reduce_by_key.inl
deleted file mode 100644
index 18dc1e4994..0000000000
--- a/compat/thrust/system/cuda/detail/reduce_by_key.inl
+++ /dev/null
@@ -1,705 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/iterator/permutation_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/system/detail/generic/select_system.h>
-
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-#include <thrust/detail/type_traits/iterator/is_discard_iterator.h>
-
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/temporary_array.h>
-
-#include <thrust/reduce.h>
-#include <thrust/scan.h>
-#include <thrust/system/cuda/detail/default_decomposition.h>
-#include <thrust/system/cuda/detail/block/inclusive_scan.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/reduce_intervals.h>
-#include <thrust/system/cuda/detail/detail/uninitialized.h>
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace reduce_by_key_detail
-{
-
-template <typename FlagType, typename IndexType, typename KeyType, typename BinaryPredicate>
-struct tail_flag_functor
-{
-  BinaryPredicate binary_pred; // NB: this must be the first member for performance reasons
-  IndexType n;
-
-  typedef FlagType result_type;
-  
-  tail_flag_functor(IndexType n, BinaryPredicate binary_pred)
-    : n(n), binary_pred(binary_pred)
-  {}
-  
-  // XXX why is this noticably faster?  (it may read past the end of input)
-  //FlagType operator()(const thrust::tuple<IndexType,KeyType,KeyType>& t) const
-  
-  template <typename Tuple>
-  __host__ __device__ __thrust_forceinline__
-  FlagType operator()(const Tuple& t)
-  {
-    if (thrust::get<0>(t) == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t)))
-      return 1;
-    else
-      return 0;
-  }
-};
-
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool FullBlock,
-          typename Context,
-          typename FlagIterator,
-          typename FlagType>
-__device__ __thrust_forceinline__
-FlagType load_flags(Context context,
-                    const unsigned int n,
-                    FlagIterator iflags,
-                    FlagType  (&sflag)[CTA_SIZE])
-{
-  FlagType flag_bits = 0;
-
-  // load flags in unordered fashion
-  for(unsigned int k = 0; k < K; k++)
-  {
-    const unsigned int offset = k*CTA_SIZE + context.thread_index();
-
-    if (FullBlock || offset < n)
-    {
-      FlagIterator temp = iflags + offset;
-      if (*temp)
-        flag_bits |= FlagType(1) << k;
-    }
-  }
-
-  sflag[context.thread_index()] = flag_bits;
-  
-  context.barrier();
-
-  flag_bits = 0;
-
-  // obtain flags for iflags[K * context.thread_index(), K * context.thread_index() + K)
-  for(unsigned int k = 0; k < K; k++)
-  {
-    const unsigned int offset = K * context.thread_index() + k;
-
-    if (FullBlock || offset < n)
-    {
-      flag_bits |= ((sflag[offset % CTA_SIZE] >> (offset / CTA_SIZE)) & FlagType(1)) << k;
-    }
-  }
-
-  context.barrier();
-  
-  sflag[context.thread_index()] = flag_bits;
-  
-  context.barrier();
-
-  return flag_bits;
-}
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool FullBlock,
-          typename Context,
-          typename InputIterator2,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void load_values(Context context,
-                 const unsigned int n,
-                 InputIterator2 ivals,
-                 ValueType (&sdata)[K][CTA_SIZE + 1])
-{
-  for(unsigned int k = 0; k < K; k++)
-  {
-    const unsigned int offset = k*CTA_SIZE + context.thread_index();
-
-    if (FullBlock || offset < n)
-    {
-      InputIterator2 temp = ivals + offset;
-      sdata[offset % K][offset / K] = *temp;
-    }
-  }
-
-  context.barrier();
-}
-
-
-template <unsigned int CTA_SIZE,
-          unsigned int K,
-          bool FullBlock,
-          typename Context,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction,
-          typename FlagIterator,
-          typename FlagType,
-          typename IndexType,
-          typename ValueType>
-__device__ __thrust_forceinline__
-void reduce_by_key_body(Context context,
-                        const unsigned int n,
-                        InputIterator1   ikeys,
-                        InputIterator2   ivals,
-                        OutputIterator1  okeys,
-                        OutputIterator2  ovals,
-                        BinaryPredicate  binary_pred,
-                        BinaryFunction   binary_op,
-                        FlagIterator     iflags,
-                        FlagType  (&sflag)[CTA_SIZE],
-                        ValueType (&sdata)[K][CTA_SIZE + 1],
-                        bool&      carry_in,
-                        IndexType& carry_index,
-                        ValueType& carry_value)
-{
-  // load flags
-  const FlagType flag_bits  = load_flags<CTA_SIZE,K,FullBlock>(context, n, iflags, sflag);
-  const FlagType flag_count = __popc(flag_bits); // TODO hide this behind a template
-  const FlagType left_flag  = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1];
-  const FlagType head_flag  = (context.thread_index() == 0 || flag_bits & ((1 << (K - 1)) - 1) || left_flag & (1 << (K - 1))) ? 1 : 0;
-  
-  context.barrier();
-
-  // scan flag counts
-  sflag[context.thread_index()] = flag_count; context.barrier();
-
-  block::inclusive_scan(context, sflag, thrust::plus<FlagType>());
-
-  const FlagType output_position = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1];
-  const FlagType num_outputs     = sflag[CTA_SIZE - 1];
-
-  context.barrier();
-
-  // shuffle keys and write keys out
-  if (!thrust::detail::is_discard_iterator<OutputIterator1>::value)
-  {
-    // XXX this could be improved
-    for (unsigned int i = 0; i < num_outputs; i += CTA_SIZE)
-    {
-      FlagType position = output_position;
-
-      for(unsigned int k = 0; k < K; k++)
-      {
-        if (flag_bits & (FlagType(1) << k))
-        {
-          if (i <= position && position < i + CTA_SIZE)
-            sflag[position - i] = K * context.thread_index() + k;
-          position++;
-        }
-      }
-
-      context.barrier();
-
-      if (i + context.thread_index() < num_outputs)
-      {
-        InputIterator1  tmp1 = ikeys + sflag[context.thread_index()];
-        OutputIterator1 tmp2 = okeys + (i + context.thread_index());
-        *tmp2 = *tmp1; 
-      }
-      
-      context.barrier();
-    }
-  }
-
-  // load values
-  load_values<CTA_SIZE,K,FullBlock> (context, n, ivals, sdata);
-
-  ValueType ldata[K];
-  for (unsigned int k = 0; k < K; k++)
-      ldata[k] = sdata[k][context.thread_index()];
-
-  // carry in (if necessary)
-  if (context.thread_index() == 0 && carry_in)
-  {
-    // XXX WAR sm_10 issue
-    ValueType tmp1 = carry_value;
-    ldata[0] = binary_op(tmp1, ldata[0]);
-  }
-
-  context.barrier();
-
-  // sum local values
-  {
-    for(unsigned int k = 1; k < K; k++)
-    {
-      const unsigned int offset = K * context.thread_index() + k;
-
-      if (FullBlock || offset < n)
-      {
-        if (!(flag_bits & (FlagType(1) << (k - 1))))
-          ldata[k] = binary_op(ldata[k - 1], ldata[k]);
-      }
-    }
-  }
-
-  // second level segmented scan
-  {
-    // use head flags for segmented scan
-    sflag[context.thread_index()] = head_flag;  sdata[K - 1][context.thread_index()] = ldata[K - 1]; context.barrier();
-
-    if (FullBlock)
-      block::inclusive_scan_by_flag(context, sflag, sdata[K-1], binary_op);
-    else
-      block::inclusive_scan_by_flag_n(context, sflag, sdata[K-1], n, binary_op);
-  }
-
-  // update local values
-  if (context.thread_index() > 0)
-  {
-    unsigned int update_bits  = (flag_bits << 1) | (left_flag >> (K - 1));
-// TODO remove guard
-#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
-    unsigned int update_count = __ffs(update_bits) - 1u; // NB: this might wrap around to UINT_MAX
-#else
-    unsigned int update_count = 0;
-#endif // THRUST_DEVICE_COMPILER_NVCC
-
-    if (!FullBlock && (K + 1) * context.thread_index() > n)
-      update_count = thrust::min(n - K * context.thread_index(), update_count);
-
-    ValueType left = sdata[K - 1][context.thread_index() - 1];
-
-    for(unsigned int k = 0; k < K; k++)
-    {
-      if (k < update_count)
-        ldata[k] = binary_op(left, ldata[k]);
-    }
-  }
-  
-  context.barrier();
-
-  // store carry out
-  if (FullBlock)
-  {
-    if (context.thread_index() == CTA_SIZE - 1)
-    {
-      carry_value = ldata[K - 1];
-      carry_in    = (flag_bits & (FlagType(1) << (K - 1))) ? false : true;
-      carry_index = num_outputs;
-    }
-  }
-  else
-  {
-    if (context.thread_index() == (n - 1) / K)
-    {
-      for (unsigned int k = 0; k < K; k++)
-          if (k == (n - 1) % K)
-              carry_value = ldata[k];
-      carry_in    = (flag_bits & (FlagType(1) << ((n - 1) % K))) ? false : true;
-      carry_index = num_outputs;
-    }
-  }
-
-  // shuffle values
-  {
-    FlagType position = output_position;
-  
-    for(unsigned int k = 0; k < K; k++)
-    {
-      const unsigned int offset = K * context.thread_index() + k;
-  
-      if (FullBlock || offset < n)
-      {
-        if (flag_bits & (FlagType(1) << k))
-        {
-          sdata[position / CTA_SIZE][position % CTA_SIZE] = ldata[k];
-          position++;
-        }
-      }
-    }
-  }
-
-  context.barrier();
-
-
-  // write values out
-  for(unsigned int k = 0; k < K; k++)
-  {
-    const unsigned int offset = CTA_SIZE * k + context.thread_index();
-
-    if (offset < num_outputs)
-    {
-      OutputIterator2 tmp = ovals + offset;
-      *tmp = sdata[k][context.thread_index()];
-    }
-  }
-
-  context.barrier();
-}
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction,
-          typename FlagIterator,
-          typename IndexIterator,
-          typename ValueIterator,
-          typename BoolIterator,
-          typename Decomposition,
-          typename Context>
-struct reduce_by_key_closure
-{
-  InputIterator1   ikeys;
-  InputIterator2   ivals;
-  OutputIterator1  okeys;
-  OutputIterator2  ovals;
-  BinaryPredicate  binary_pred;
-  BinaryFunction   binary_op;
-  FlagIterator     iflags;
-  IndexIterator    interval_counts;
-  ValueIterator    interval_values;
-  BoolIterator     interval_carry;
-  Decomposition    decomp;
-  Context          context;
-
-  typedef Context context_type;
-
-  reduce_by_key_closure(InputIterator1   ikeys,
-                        InputIterator2   ivals,
-                        OutputIterator1  okeys,
-                        OutputIterator2  ovals,
-                        BinaryPredicate  binary_pred,
-                        BinaryFunction   binary_op,
-                        FlagIterator     iflags,
-                        IndexIterator    interval_counts,
-                        ValueIterator    interval_values,
-                        BoolIterator     interval_carry,
-                        Decomposition    decomp,
-                        Context          context = Context())
-    : ikeys(ikeys), ivals(ivals), okeys(okeys), ovals(ovals), binary_pred(binary_pred), binary_op(binary_op),
-      iflags(iflags), interval_counts(interval_counts), interval_values(interval_values), interval_carry(interval_carry),
-      decomp(decomp), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename thrust::iterator_value<InputIterator1>::type KeyType;
-    typedef typename thrust::iterator_value<ValueIterator>::type  ValueType;
-    typedef typename Decomposition::index_type                    IndexType;
-    typedef typename thrust::iterator_value<FlagIterator>::type   FlagType;
-
-    const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value;
-
-// TODO centralize this mapping (__CUDA_ARCH__ -> smem bytes)
-#if __CUDA_ARCH__ >= 200
-    const unsigned int SMEM = (48 * 1024);
-#else
-    const unsigned int SMEM = (16 * 1024) - 256;
-#endif
-    const unsigned int SMEM_FIXED = CTA_SIZE * sizeof(FlagType) + sizeof(ValueType) + sizeof(IndexType) + sizeof(bool);
-    const unsigned int BOUND_1 = (SMEM - SMEM_FIXED) / ((CTA_SIZE + 1) * sizeof(ValueType));
-    const unsigned int BOUND_2 = 8 * sizeof(FlagType);
-    const unsigned int BOUND_3 = 6;
-  
-    // TODO replace this with a static_min<BOUND_1,BOUND_2,BOUND_3>::value
-    const unsigned int K = (BOUND_1 < BOUND_2) ? (BOUND_1 < BOUND_3 ? BOUND_1 : BOUND_3) : (BOUND_2 < BOUND_3 ? BOUND_2 : BOUND_3);
-  
-    __shared__ detail::uninitialized<FlagType[CTA_SIZE]>         sflag;
-    __shared__ detail::uninitialized<ValueType[K][CTA_SIZE + 1]> sdata;  // padded to avoid bank conflicts
-  
-    __shared__ detail::uninitialized<ValueType> carry_value; // storage for carry in and carry out
-    __shared__ detail::uninitialized<IndexType> carry_index;
-    __shared__ detail::uninitialized<bool>      carry_in; 
-
-    typename Decomposition::range_type interval = decomp[context.block_index()];
-    //thrust::system::detail::internal::index_range<IndexType> interval = decomp[context.block_index()];
-  
-
-    if (context.thread_index() == 0)
-    {
-      carry_in = false; // act as though the previous segment terminated just before us
-  
-      if (context.block_index() == 0)
-      {
-        carry_index = 0;
-      }
-      else
-      {
-        interval_counts += (context.block_index() - 1);
-        carry_index = *interval_counts;
-      }
-    }
-  
-    context.barrier();
-  
-    IndexType base = interval.begin();
-  
-    // advance input and output iterators
-    ikeys  += base;
-    ivals  += base;
-    iflags += base;
-    okeys  += carry_index;
-    ovals  += carry_index;
-  
-    const unsigned int unit_size = K * CTA_SIZE;
-  
-    // process full units
-    while (base + unit_size <= interval.end())
-    {
-      const unsigned int n = unit_size;
-      reduce_by_key_body<CTA_SIZE,K,true>(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get());
-      base   += unit_size;
-      ikeys  += unit_size;
-      ivals  += unit_size;
-      iflags += unit_size;
-      okeys  += carry_index;
-      ovals  += carry_index;
-    }
-  
-    // process partially full unit at end of input (if necessary)
-    if (base < interval.end())
-    {
-      const unsigned int n = interval.end() - base;
-      reduce_by_key_body<CTA_SIZE,K,false>(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get());
-    }
-  
-    if (context.thread_index() == 0)
-    {
-      interval_values += context.block_index();
-      interval_carry  += context.block_index();
-      *interval_values = carry_value;
-      *interval_carry  = carry_in;
-    }
-  }
-}; // end reduce_by_key_closure
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-struct DefaultPolicy
-{
-  // typedefs
-  typedef unsigned int                                                       FlagType;
-  typedef typename thrust::iterator_traits<InputIterator1>::difference_type  IndexType;
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type       KeyType;
-  typedef thrust::system::detail::internal::uniform_decomposition<IndexType> Decomposition;
-    
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator2 is a "pure" output iterator
-  //   TemporaryType = InputIterator2::value_type
-  // else
-  //   TemporaryType = OutputIterator2::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::has_result_type<BinaryFunction>::value,
-    thrust::detail::result_type<BinaryFunction>,
-    thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator2>::value,
-      thrust::iterator_value<InputIterator2>,
-      thrust::iterator_value<OutputIterator2>
-    >
-  >::type ValueType;
- 
-  // XXX WAR problem on sm_11
-  // TODO tune this
-  const static unsigned int ThreadsPerBlock = (thrust::detail::is_pod<ValueType>::value) ? 256 : 192;
-
-  DefaultPolicy(InputIterator1 first1, InputIterator1 last1)
-    : decomp(default_decomposition<IndexType>(last1 - first1))
-  {}
-
-  // member variables
-  Decomposition decomp;
-};
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction,
-          typename Policy>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op,
-                Policy policy)
-{
-    typedef typename Policy::FlagType       FlagType;
-    typedef typename Policy::Decomposition  Decomposition;
-    typedef typename Policy::IndexType      IndexType;
-    typedef typename Policy::KeyType        KeyType;
-    typedef typename Policy::ValueType      ValueType;
-
-    // temporary arrays
-    typedef thrust::detail::temporary_array<IndexType,DerivedPolicy> IndexArray;
-    typedef thrust::detail::temporary_array<KeyType,DerivedPolicy>   KeyArray;
-    typedef thrust::detail::temporary_array<ValueType,DerivedPolicy> ValueArray;
-    typedef thrust::detail::temporary_array<bool,DerivedPolicy>      BoolArray;
-
-    Decomposition decomp = policy.decomp;
-
-    // input size
-    IndexType n = keys_last - keys_first;
-
-    if (n == 0)
-      return thrust::make_pair(keys_output, values_output);
-
-    IndexArray interval_counts(exec, decomp.size());
-    ValueArray interval_values(exec, decomp.size());
-    BoolArray  interval_carry(exec, decomp.size());
-
-    // an ode to c++11 auto
-    typedef thrust::counting_iterator<IndexType> CountingIterator;
-    typedef thrust::transform_iterator<
-      tail_flag_functor<FlagType,IndexType,KeyType,BinaryPredicate>,
-      thrust::zip_iterator<
-        thrust::tuple<CountingIterator,InputIterator1,InputIterator1>
-      >
-    > FlagIterator;
-
-    FlagIterator iflag= thrust::make_transform_iterator
-       (thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), keys_first, keys_first + 1)),
-        tail_flag_functor<FlagType,IndexType,KeyType,BinaryPredicate>(n, binary_pred));
-
-    // count number of tail flags per interval
-    thrust::system::cuda::detail::reduce_intervals(exec, iflag, interval_counts.begin(), thrust::plus<IndexType>(), decomp);
-
-    thrust::inclusive_scan(exec,
-                           interval_counts.begin(), interval_counts.end(),
-                           interval_counts.begin(),
-                           thrust::plus<IndexType>());
- 
-    // determine output size
-    const IndexType N = interval_counts[interval_counts.size() - 1];
-   
-    const static unsigned int ThreadsPerBlock = Policy::ThreadsPerBlock;
-    typedef typename IndexArray::iterator IndexIterator;
-    typedef typename ValueArray::iterator ValueIterator; 
-    typedef typename BoolArray::iterator  BoolIterator;  
-    typedef detail::statically_blocked_thread_array<ThreadsPerBlock> Context;
-    typedef reduce_by_key_closure<InputIterator1,InputIterator2,OutputIterator1,OutputIterator2,BinaryPredicate,BinaryFunction,
-                                  FlagIterator,IndexIterator,ValueIterator,BoolIterator,Decomposition,Context> Closure;
-    Closure closure
-      (keys_first,  values_first,
-       keys_output, values_output,
-       binary_pred, binary_op,
-       iflag,
-       interval_counts.begin(),
-       interval_values.begin(),
-       interval_carry.begin(),
-       decomp);
-    detail::launch_closure(closure, decomp.size(), ThreadsPerBlock);
-   
-    if (decomp.size() > 1)
-    {
-      ValueArray interval_values2(exec, decomp.size());
-      IndexArray interval_counts2(exec, decomp.size());
-      BoolArray  interval_carry2(exec, decomp.size());
-
-      IndexType N2 = 
-      thrust::reduce_by_key
-        (exec,
-         thrust::make_zip_iterator(thrust::make_tuple(interval_counts.begin(), interval_carry.begin())),
-         thrust::make_zip_iterator(thrust::make_tuple(interval_counts.end(),   interval_carry.end())),
-         interval_values.begin(),
-         thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin())),
-         interval_values2.begin(),
-         thrust::equal_to< thrust::tuple<IndexType,bool> >(),
-         binary_op).first
-        -
-        thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin()));
-    
-      thrust::transform_if
-        (exec,
-         interval_values2.begin(), interval_values2.begin() + N2,
-         thrust::make_permutation_iterator(values_output, interval_counts2.begin()),
-         interval_carry2.begin(),
-         thrust::make_permutation_iterator(values_output, interval_counts2.begin()),
-         binary_op,
-         thrust::identity<bool>());
-    }
-  
-    return thrust::make_pair(keys_output + N, values_output + N); 
-}
-
-} // end namespace reduce_by_key_detail
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                InputIterator1 keys_first, 
-                InputIterator1 keys_last,
-                InputIterator2 values_first,
-                OutputIterator1 keys_output,
-                OutputIterator2 values_output,
-                BinaryPredicate binary_pred,
-                BinaryFunction binary_op)
-{
-  return reduce_by_key_detail::reduce_by_key
-    (exec, 
-     keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op,
-     reduce_by_key_detail::DefaultPolicy<InputIterator1,InputIterator2,OutputIterator1,OutputIterator2,BinaryPredicate,BinaryFunction>(keys_first, keys_last));
-} // end reduce_by_key()
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.h b/compat/thrust/system/cuda/detail/reduce_intervals.h
deleted file mode 100644
index 505d13635c..0000000000
--- a/compat/thrust/system/cuda/detail/reduce_intervals.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce_intervals.h
- *  \brief CUDA implementations of reduce_intervals algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition>
-void reduce_intervals(execution_policy<DerivedPolicy> &exec,
-                      InputIterator input,
-                      OutputIterator output,
-                      BinaryFunction binary_op,
-                      Decomposition decomp);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/reduce_intervals.inl>
-
diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.inl b/compat/thrust/system/cuda/detail/reduce_intervals.inl
deleted file mode 100644
index 2381769223..0000000000
--- a/compat/thrust/system/cuda/detail/reduce_intervals.inl
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-
-#include <thrust/iterator/iterator_traits.h>
-
-#include <thrust/detail/minmax.h>
-#include <thrust/system/detail/internal/decompose.h>
-#include <thrust/system/cuda/detail/extern_shared_ptr.h>
-#include <thrust/system/cuda/detail/block/reduce.h>
-#include <thrust/system/cuda/detail/detail/launch_closure.h>
-#include <thrust/system/cuda/detail/detail/launch_calculator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template <typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition,
-          typename Context>
-struct commutative_reduce_intervals_closure
-{
-  InputIterator  input;
-  OutputIterator output;
-  BinaryFunction binary_op;
-  Decomposition  decomposition;
-  unsigned int shared_array_size;
-
-  typedef Context context_type;
-  context_type context;
-
-  commutative_reduce_intervals_closure(InputIterator input, OutputIterator output, BinaryFunction binary_op, Decomposition decomposition, unsigned int shared_array_size, Context context = Context())
-    : input(input), output(output), binary_op(binary_op), decomposition(decomposition), shared_array_size(shared_array_size), context(context) {}
-
-  __device__ __thrust_forceinline__
-  void operator()(void)
-  {
-    typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
-    extern_shared_ptr<OutputType>  shared_array;
-
-    typedef typename Decomposition::index_type index_type;
-   
-    // this block processes results in [range.begin(), range.end())
-    thrust::system::detail::internal::index_range<index_type> range = decomposition[context.block_index()];
-
-    index_type i = range.begin() + context.thread_index();
-      
-    input += i;
-
-    if (range.size() < context.block_dimension())
-    {
-      // compute reduction with the first shared_array_size threads
-      if (context.thread_index() < thrust::min<index_type>(shared_array_size,range.size()))
-      {
-        OutputType sum = *input;
-
-        i     += shared_array_size;
-        input += shared_array_size;
-
-        while (i < range.end())
-        {
-          OutputType val = *input;
-
-          sum = binary_op(sum, val);
-
-          i      += shared_array_size;
-          input  += shared_array_size;
-        }
-
-        shared_array[context.thread_index()] = sum;  
-      }
-    }
-    else
-    {
-      // compute reduction with all blockDim.x threads
-      OutputType sum = *input;
-
-      i     += context.block_dimension();
-      input += context.block_dimension();
-
-      while (i < range.end())
-      {
-        OutputType val = *input;
-
-        sum = binary_op(sum, val);
-
-        i      += context.block_dimension();
-        input  += context.block_dimension();
-      }
-
-      // write first shared_array_size values into shared memory
-      if (context.thread_index() < shared_array_size)
-        shared_array[context.thread_index()] = sum;  
-
-      // accumulate remaining values (if any) to shared memory in stages
-      if (context.block_dimension() > shared_array_size)
-      {
-        unsigned int lb = shared_array_size;
-        unsigned int ub = shared_array_size + lb;
-        
-        while (lb < context.block_dimension())
-        {
-          context.barrier();
-
-          if (lb <= context.thread_index() && context.thread_index() < ub)
-          {
-            OutputType tmp = shared_array[context.thread_index() - lb];
-            shared_array[context.thread_index() - lb] = binary_op(tmp, sum);
-          }
-
-          lb += shared_array_size;
-          ub += shared_array_size;
-        }
-      }
-    }
-  
-    context.barrier();
-
-    block::reduce_n(context, shared_array, thrust::min<index_type>(range.size(), shared_array_size), binary_op);
-  
-    if (context.thread_index() == 0)
-    {
-      output += context.block_index();
-      *output = shared_array[0];
-    }
-  }
-};
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN
-
-template <typename ExecutionPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition>
-void reduce_intervals(execution_policy<ExecutionPolicy> &,
-                      InputIterator input,
-                      OutputIterator output,
-                      BinaryFunction binary_op,
-                      Decomposition decomp)
-{
-  // we're attempting to launch a kernel, assert we're compiling with nvcc
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-
-  if (decomp.size() == 0)
-    return;
-  
-  // TODO if (decomp.size() > deviceProperties.maxGridSize[0]) throw cuda exception (or handle general case)
-
-  typedef detail::blocked_thread_array Context;
-  typedef commutative_reduce_intervals_closure<InputIterator,OutputIterator,BinaryFunction,Decomposition,Context> Closure;
-  typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
-  
-  detail::launch_calculator<Closure> calculator;
-
-  thrust::tuple<size_t,size_t,size_t> config = calculator.with_variable_block_size_available_smem();
-
-  //size_t max_blocks = thrust::get<0>(config);
-  size_t block_size = thrust::get<1>(config);
-  size_t max_memory = thrust::get<2>(config);
-
-  // determine shared array size
-  size_t shared_array_size  = thrust::min(max_memory / sizeof(OutputType), block_size);
-  size_t shared_array_bytes = sizeof(OutputType) * shared_array_size;
-  
-  // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory"
-
-  Closure closure(input, output, binary_op, decomp, shared_array_size);
-  detail::launch_closure(closure, decomp.size(), block_size, shared_array_bytes);
-}
-
-__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/remove.h b/compat/thrust/system/cuda/detail/remove.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/remove.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/replace.h b/compat/thrust/system/cuda/detail/replace.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/replace.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/reverse.h b/compat/thrust/system/cuda/detail/reverse.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/reverse.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.h b/compat/thrust/system/cuda/detail/runtime_introspection.h
deleted file mode 100644
index 39f6c9fadc..0000000000
--- a/compat/thrust/system/cuda/detail/runtime_introspection.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file runtime_introspection.h
- *  \brief Defines the interface to functions
- *         providing introspection into the architecture
- *         of CUDA devices.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// #include this for device_properties_t and function_attributes_t
-#include <thrust/system/cuda/detail/cuda_launch_config.h>
-
-// #include this for size_t
-#include <cstddef>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-/*! Returns the current device ordinal.
- */
-inline int current_device();
-
-/*! Returns a copy of the device_properties_t structure
- *  that is associated with a given device.
- */
-inline device_properties_t device_properties(int device_id);
-
-/*! Returns a copy of the device_properties_t structure
- *  that is associated with the current device.
- */
-inline device_properties_t device_properties(void);
-
-/*! Returns a copy of the function_attributes_t structure
- *  that is associated with a given __global__ function
- */
-template <typename KernelFunction>
-inline function_attributes_t function_attributes(KernelFunction kernel);
-
-/*! Returns the compute capability of a device in integer format.
- *  For example, returns 10 for sm_10 and 21 for sm_21
- *  \return The compute capability as an integer
- */
-inline size_t compute_capability(const device_properties_t &properties);
-inline size_t compute_capability(void);
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/runtime_introspection.inl>
-
diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.inl b/compat/thrust/system/cuda/detail/runtime_introspection.inl
deleted file mode 100644
index a5cc382964..0000000000
--- a/compat/thrust/system/cuda/detail/runtime_introspection.inl
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/runtime_introspection.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-#include <thrust/detail/util/blocking.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/system_error.h>
-#include <thrust/system/cuda/error.h>
-
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace runtime_introspection_detail
-{
-
-
-inline void get_device_properties(device_properties_t &p, int device_id)
-{
-  cudaDeviceProp properties;
-  
-  cudaError_t error = cudaGetDeviceProperties(&properties, device_id);
-  
-  if(error)
-    throw thrust::system_error(error, thrust::cuda_category());
-
-  // be careful about how this is initialized!
-  device_properties_t temp = {
-    properties.major,
-    {
-      properties.maxGridSize[0],
-      properties.maxGridSize[1],
-      properties.maxGridSize[2]
-    },
-    properties.maxThreadsPerBlock,
-    properties.maxThreadsPerMultiProcessor,
-    properties.minor,
-    properties.multiProcessorCount,
-    properties.regsPerBlock,
-    properties.sharedMemPerBlock,
-    properties.warpSize
-  };
-
-  p = temp;
-} // end get_device_properties()
-
-
-} // end runtime_introspection_detail
-
-
-inline device_properties_t device_properties(int device_id)
-{
-  // cache the result of get_device_properties, because it is slow
-  // only cache the first few devices
-  static const int max_num_devices                              = 16;
-
-  static bool properties_exist[max_num_devices]                 = {0};
-  static device_properties_t device_properties[max_num_devices] = {};
-
-  if(device_id >= max_num_devices)
-  {
-    device_properties_t result;
-    runtime_introspection_detail::get_device_properties(result, device_id);
-    return result;
-  }
-
-  if(!properties_exist[device_id])
-  {
-    runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id);
-
-    // disallow the compiler to move the write to properties_exist[device_id]
-    // before the initialization of device_properties[device_id]
-    __thrust_compiler_fence();
-    
-    properties_exist[device_id] = true;
-  }
-
-  return device_properties[device_id];
-}
-
-inline int current_device()
-{
-  int result = -1;
-
-  cudaError_t error = cudaGetDevice(&result);
-
-  if(error)
-    throw thrust::system_error(error, thrust::cuda_category());
-
-  if(result < 0)
-    throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category());
-
-  return result;
-}
-
-inline device_properties_t device_properties(void)
-{
-  return device_properties(current_device());
-}
-
-template <typename KernelFunction>
-inline function_attributes_t function_attributes(KernelFunction kernel)
-{
-// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined
-#ifdef __CUDACC__
-  typedef void (*fun_ptr_type)();
-
-  fun_ptr_type fun_ptr = reinterpret_cast<fun_ptr_type>(kernel);
-
-  cudaFuncAttributes attributes;
-  
-  cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr);
-  
-  if(error)
-  {
-    throw thrust::system_error(error, thrust::cuda_category());
-  }
-
-  // be careful about how this is initialized!
-  function_attributes_t result = {
-    attributes.constSizeBytes,
-    attributes.localSizeBytes,
-    attributes.maxThreadsPerBlock,
-    attributes.numRegs,
-    attributes.sharedSizeBytes
-  };
-
-  return result;
-#else
-  return function_attributes_t();
-#endif // __CUDACC__
-}
-
-inline size_t compute_capability(const device_properties_t &properties)
-{
-  return 10 * properties.major + properties.minor;
-}
-
-inline size_t compute_capability(void)
-{
-  return compute_capability(device_properties());
-}
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/scan.h b/compat/thrust/system/cuda/detail/scan.h
deleted file mode 100644
index 036c89a217..0000000000
--- a/compat/thrust/system/cuda/detail/scan.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.h
- *  \brief Scan operations (parallel prefix-sum) [cuda]
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                AssociativeOperator binary_op);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                AssociativeOperator binary_op);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/scan.inl>
-
diff --git a/compat/thrust/system/cuda/detail/scan.inl b/compat/thrust/system/cuda/detail/scan.inl
deleted file mode 100644
index 9d9c6d20ee..0000000000
--- a/compat/thrust/system/cuda/detail/scan.inl
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.inl
- *  \brief Inline file for scan.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-
-#include <thrust/system/cuda/detail/detail/fast_scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                AssociativeOperator binary_op)
-{
-    // we're attempting to launch a kernel, assert we're compiling with nvcc
-    // ========================================================================
-    // X Note to the user: If you've found this line due to a compiler error, X
-    // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-    // ========================================================================
-    THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-    
-    return thrust::system::cuda::detail::detail::fast_scan::inclusive_scan(exec, first, last, result, binary_op);
-}
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                AssociativeOperator binary_op)
-{
-    // we're attempting to launch a kernel, assert we're compiling with nvcc
-    // ========================================================================
-    // X Note to the user: If you've found this line due to a compiler error, X
-    // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-    // ========================================================================
-    THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-
-    return thrust::system::cuda::detail::detail::fast_scan::exclusive_scan(exec, first, last, result, init, binary_op);
-}
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/scan_by_key.h b/compat/thrust/system/cuda/detail/scan_by_key.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/scan_by_key.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/scatter.h b/compat/thrust/system/cuda/detail/scatter.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/scatter.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/sequence.h b/compat/thrust/system/cuda/detail/sequence.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/sequence.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/set_difference.inl b/compat/thrust/system/cuda/detail/set_difference.inl
deleted file mode 100644
index 33d9884730..0000000000
--- a/compat/thrust/system/cuda/detail/set_difference.inl
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/system/cuda/detail/detail/set_operation.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace set_difference_detail
-{
-
-
-struct serial_bounded_set_difference
-{
-  // max_input_size <= 32
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
-  inline __device__
-    thrust::detail::uint32_t operator()(Size max_input_size,
-                                        InputIterator1 first1, InputIterator1 last1,
-                                        InputIterator2 first2, InputIterator2 last2,
-                                        OutputIterator result,
-                                        Compare comp)
-  {
-    thrust::detail::uint32_t active_mask = 0;
-    thrust::detail::uint32_t active_bit = 1;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        *result = *first1;
-        active_mask |= active_bit;
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-      } // end else if
-      else
-      {
-        ++first1;
-        ++first2;
-      } // end else
-  
-      ++result;
-      active_bit <<= 1;
-    } // end while
-
-    while(first1 != last1)
-    {
-      *result = *first1;
-      ++first1;
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    }
-  
-    return active_mask;
-  }
-
-
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename Compare>
-  inline __device__
-    Size count(Size max_input_size,
-               InputIterator1 first1, InputIterator1 last1,
-               InputIterator2 first2, InputIterator2 last2,
-               Compare comp)
-  {
-    Size result = 0;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        ++first1;
-        ++result;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-      } // end else if
-      else
-      {
-        ++first1;
-        ++first2;
-      } // end else
-    } // end while
-  
-    return result + last1 - first1;
-  }
-}; // end serial_bounded_set_difference
-
-
-} // end namespace set_difference_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_difference(execution_policy<DerivedPolicy> &exec,
-                                     RandomAccessIterator1 first1,
-                                     RandomAccessIterator1 last1,
-                                     RandomAccessIterator2 first2,
-                                     RandomAccessIterator2 last2,
-                                     RandomAccessIterator3 result,
-                                     Compare comp)
-{
-  return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_difference_detail::serial_bounded_set_difference());
-} // end set_difference
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/set_intersection.inl b/compat/thrust/system/cuda/detail/set_intersection.inl
deleted file mode 100644
index e4810b6d1b..0000000000
--- a/compat/thrust/system/cuda/detail/set_intersection.inl
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/system/cuda/detail/detail/set_operation.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace set_intersection_detail
-{
-
-
-struct serial_bounded_set_intersection
-{
-  // max_input_size <= 32
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
-  inline __device__
-    thrust::detail::uint32_t operator()(Size max_input_size,
-                                        InputIterator1 first1, InputIterator1 last1,
-                                        InputIterator2 first2, InputIterator2 last2,
-                                        OutputIterator result,
-                                        Compare comp)
-  {
-    thrust::detail::uint32_t active_mask = 0;
-    thrust::detail::uint32_t active_bit = 1;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-      } // end else if
-      else
-      {
-        *result = *first1;
-        ++first1;
-        ++first2;
-        active_mask |= active_bit;
-      } // end else
-  
-      ++result;
-      active_bit <<= 1;
-    } // end while
-  
-    return active_mask;
-  }
-
-
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename Compare>
-  inline __device__
-    Size count(Size max_input_size,
-               InputIterator1 first1, InputIterator1 last1,
-               InputIterator2 first2, InputIterator2 last2,
-               Compare comp)
-  {
-    Size result = 0;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-      } // end else if
-      else
-      {
-        ++result;
-        ++first1;
-        ++first2;
-      } // end else
-    } // end while
-  
-    return result;
-  }
-}; // end serial_bounded_set_intersection
-
-
-} // end namespace set_intersection_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_intersection(execution_policy<DerivedPolicy> &exec,
-                                       RandomAccessIterator1 first1,
-                                       RandomAccessIterator1 last1,
-                                       RandomAccessIterator2 first2,
-                                       RandomAccessIterator2 last2,
-                                       RandomAccessIterator3 result,
-                                       Compare comp)
-{
-  return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_intersection_detail::serial_bounded_set_intersection());
-} // end set_intersection
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/set_operations.h b/compat/thrust/system/cuda/detail/set_operations.h
deleted file mode 100644
index 040e3419ce..0000000000
--- a/compat/thrust/system/cuda/detail/set_operations.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_difference(execution_policy<DerivedPolicy> &exec,
-                                     RandomAccessIterator1 first1,
-                                     RandomAccessIterator1 last1,
-                                     RandomAccessIterator2 first2,
-                                     RandomAccessIterator2 last2,
-                                     RandomAccessIterator3 result,
-                                     Compare comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_intersection(execution_policy<DerivedPolicy> &exec,
-                                       RandomAccessIterator1 first1,
-                                       RandomAccessIterator1 last1,
-                                       RandomAccessIterator2 first2,
-                                       RandomAccessIterator2 last2,
-                                       RandomAccessIterator3 result,
-                                       Compare comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_symmetric_difference(execution_policy<DerivedPolicy> &exec,
-                                               RandomAccessIterator1 first1,
-                                               RandomAccessIterator1 last1,
-                                               RandomAccessIterator2 first2,
-                                               RandomAccessIterator2 last2,
-                                               RandomAccessIterator3 result,
-                                               Compare comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_union(execution_policy<DerivedPolicy> &exec,
-                                RandomAccessIterator1 first1,
-                                RandomAccessIterator1 last1,
-                                RandomAccessIterator2 first2,
-                                RandomAccessIterator2 last2,
-                                RandomAccessIterator3 result,
-                                Compare comp);
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/set_difference.inl>
-#include <thrust/system/cuda/detail/set_intersection.inl>
-#include <thrust/system/cuda/detail/set_symmetric_difference.inl>
-#include <thrust/system/cuda/detail/set_union.inl>
-
diff --git a/compat/thrust/system/cuda/detail/set_symmetric_difference.inl b/compat/thrust/system/cuda/detail/set_symmetric_difference.inl
deleted file mode 100644
index 112c955bc0..0000000000
--- a/compat/thrust/system/cuda/detail/set_symmetric_difference.inl
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/system/cuda/detail/detail/set_operation.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace set_symmetric_difference_detail
-{
-
-
-struct serial_bounded_set_symmetric_difference
-{
-  // max_input_size <= 32
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
-  inline __device__
-    thrust::detail::uint32_t operator()(Size max_input_size,
-                                        InputIterator1 first1, InputIterator1 last1,
-                                        InputIterator2 first2, InputIterator2 last2,
-                                        OutputIterator result,
-                                        Compare comp)
-  {
-    thrust::detail::uint32_t active_mask = 0;
-    thrust::detail::uint32_t active_bit = 1;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        *result = *first1;
-        active_mask |= active_bit;
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        *result = *first2;
-        active_mask |= active_bit;
-        ++first2;
-      } // end else if
-      else
-      {
-        ++first1;
-        ++first2;
-      } // end else
-  
-      ++result;
-      active_bit <<= 1;
-    } // end while
-
-    while(first1 != last1)
-    {
-      *result = *first1;
-      ++first1;
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    }
-
-    while(first2 != last2)
-    {
-      *result = *first2;
-      ++first2;
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    }
-  
-    return active_mask;
-  }
-
-
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename Compare>
-  inline __device__
-    Size count(Size max_input_size,
-               InputIterator1 first1, InputIterator1 last1,
-               InputIterator2 first2, InputIterator2 last2,
-               Compare comp)
-  {
-    Size result = 0;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        ++first1;
-        ++result;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-        ++result;
-      } // end else if
-      else
-      {
-        ++first1;
-        ++first2;
-      } // end else
-    } // end while
-  
-    return result + thrust::max(last1 - first1,last2 - first2);
-  }
-}; // end serial_bounded_set_symmetric_difference
-
-
-} // end namespace set_symmetric_difference_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_symmetric_difference(execution_policy<DerivedPolicy> &exec,
-                                               RandomAccessIterator1 first1,
-                                               RandomAccessIterator1 last1,
-                                               RandomAccessIterator2 first2,
-                                               RandomAccessIterator2 last2,
-                                               RandomAccessIterator3 result,
-                                               Compare comp)
-{
-  return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_symmetric_difference_detail::serial_bounded_set_symmetric_difference());
-} // end set_symmetric_difference
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/set_union.inl b/compat/thrust/system/cuda/detail/set_union.inl
deleted file mode 100644
index 66cccab39b..0000000000
--- a/compat/thrust/system/cuda/detail/set_union.inl
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/cstdint.h>
-#include <thrust/system/cuda/detail/detail/set_operation.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-namespace set_union_detail
-{
-
-
-struct serial_bounded_set_union
-{
-  // max_input_size <= 32
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Compare>
-  inline __device__
-    thrust::detail::uint32_t operator()(Size max_input_size,
-                                        InputIterator1 first1, InputIterator1 last1,
-                                        InputIterator2 first2, InputIterator2 last2,
-                                        OutputIterator result,
-                                        Compare comp)
-  {
-    thrust::detail::uint32_t active_mask = 0;
-    thrust::detail::uint32_t active_bit = 1;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        *result = *first1;
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        *result = *first2;
-        ++first2;
-      } // end else if
-      else
-      {
-        *result = *first1;
-        ++first1;
-        ++first2;
-      } // end else
-  
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    } // end while
-
-    while(first1 != last1)
-    {
-      *result = *first1;
-      ++first1;
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    }
-
-    while(first2 != last2)
-    {
-      *result = *first2;
-      ++first2;
-      ++result;
-      active_mask |= active_bit;
-      active_bit <<= 1;
-    }
-  
-    return active_mask;
-  }
-
-
-  template<typename Size, typename InputIterator1, typename InputIterator2, typename Compare>
-  inline __device__
-    Size count(Size max_input_size,
-               InputIterator1 first1, InputIterator1 last1,
-               InputIterator2 first2, InputIterator2 last2,
-               Compare comp)
-  {
-    Size result = 0;
-  
-    while(first1 != last1 && first2 != last2)
-    {
-      if(comp(*first1,*first2))
-      {
-        ++first1;
-      } // end if
-      else if(comp(*first2,*first1))
-      {
-        ++first2;
-      } // end else if
-      else
-      {
-        ++first1;
-        ++first2;
-      } // end else
-
-      ++result;
-    } // end while
-  
-    return result + thrust::max(last1 - first1,last2 - first2);
-  }
-}; // end serial_bounded_set_union
-
-
-} // end namespace set_union_detail
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2, 
-	 typename RandomAccessIterator3,
-         typename Compare>
-RandomAccessIterator3 set_union(execution_policy<ExecutionPolicy> &exec,
-                                RandomAccessIterator1 first1,
-                                RandomAccessIterator1 last1,
-                                RandomAccessIterator2 first2,
-                                RandomAccessIterator2 last2,
-                                RandomAccessIterator3 result,
-                                Compare comp)
-{
-  return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_union_detail::serial_bounded_set_union());
-} // end set_union
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/sort.h b/compat/thrust/system/cuda/detail/sort.h
deleted file mode 100644
index e78d36a76b..0000000000
--- a/compat/thrust/system/cuda/detail/sort.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp);
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/sort.inl>
-
diff --git a/compat/thrust/system/cuda/detail/sort.inl b/compat/thrust/system/cuda/detail/sort.inl
deleted file mode 100644
index d7e0a60c45..0000000000
--- a/compat/thrust/system/cuda/detail/sort.inl
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file sort.inl
- *  \brief Inline file for sort.h
- */
-
-#include <thrust/system/cuda/detail/detail/stable_merge_sort.h>
-#include <thrust/system/cuda/detail/detail/stable_primitive_sort.h>
-
-#include <thrust/reverse.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/system/cuda/detail/temporary_indirect_permutation.h>
-#include <thrust/detail/trivial_sequence.h>
-
-
-/*
- *  This file implements the following dispatch procedure for cuda::stable_sort()
- *  and cuda::stable_sort_by_key(). The first level inspects the KeyType
- *  and StrictWeakOrdering to determine whether a sort assuming primitive-typed
- *  data may be applied.
- *
- *  If a sort assuming primitive-typed data can be applied (i.e., a radix sort),
- *  the input ranges are first trivialized (turned into simple contiguous ranges
- *  if they are not already). To implement descending orderings, an ascending
- *  sort will be reversed.
- *
- *  If a sort assuming primitive-typed data cannot be applied, a comparison-based
- *  sort is used. Depending on the size of the key and value types, one level of
- *  indirection may be applied to their input ranges. This transformation
- *  may be applied to either range to convert an ill-suited problem (i.e. sorting with
- *  large keys or large value) into a problem more amenable to the underlying
- *  merge sort algorithm.
- */
-
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-namespace stable_sort_detail
-{
-
-
-template<typename KeyType, typename StrictWeakCompare>
-  struct can_use_primitive_sort
-    : thrust::detail::and_<
-        thrust::detail::is_arithmetic<KeyType>,
-        thrust::detail::or_<
-          thrust::detail::is_same<StrictWeakCompare,thrust::less<KeyType> >,
-          thrust::detail::is_same<StrictWeakCompare,thrust::greater<KeyType> >
-        >
-      >
-{};
-
-
-template<typename RandomAccessIterator, typename StrictWeakCompare>
-  struct enable_if_primitive_sort
-    : thrust::detail::enable_if<
-        can_use_primitive_sort<
-          typename iterator_value<RandomAccessIterator>::type,
-          StrictWeakCompare
-        >::value
-      >
-{};
-
-
-template<typename RandomAccessIterator, typename StrictWeakCompare>
-  struct enable_if_comparison_sort
-    : thrust::detail::disable_if<
-        can_use_primitive_sort<
-          typename iterator_value<RandomAccessIterator>::type,
-          StrictWeakCompare
-        >::value
-      >
-{};
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  typename enable_if_primitive_sort<RandomAccessIterator,StrictWeakOrdering>::type
-    stable_sort(execution_policy<DerivedPolicy> &exec,
-                RandomAccessIterator first,
-                RandomAccessIterator last,
-                StrictWeakOrdering comp)
-{
-  // ensure sequence has trivial iterators
-  thrust::detail::trivial_sequence<RandomAccessIterator,DerivedPolicy> keys(exec, first, last);
-  
-  // CUDA path for thrust::stable_sort with primitive keys
-  // (e.g. int, float, short, etc.) and a less<T> or greater<T> comparison
-  // method is implemented with a primitive sort
-  thrust::system::cuda::detail::detail::stable_primitive_sort(exec, keys.begin(), keys.end());
-  
-  // copy results back, if necessary
-  if(!thrust::detail::is_trivial_iterator<RandomAccessIterator>::value)
-  {
-    thrust::copy(exec, keys.begin(), keys.end(), first);
-  }
-  
-  // if comp is greater<T> then reverse the keys
-  typedef typename thrust::iterator_traits<RandomAccessIterator>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-  
-  if(reverse)
-  {
-    thrust::reverse(first, last);
-  }
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  typename enable_if_comparison_sort<RandomAccessIterator,StrictWeakOrdering>::type
-    stable_sort(execution_policy<DerivedPolicy> &exec,
-                RandomAccessIterator first,
-                RandomAccessIterator last,
-                StrictWeakOrdering comp)
-{
-  // decide whether to sort keys indirectly
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type KeyType;
-  typedef thrust::detail::integral_constant<bool, (sizeof(KeyType) > 8)> use_key_indirection;
-  
-  conditional_temporary_indirect_ordering<use_key_indirection, DerivedPolicy, RandomAccessIterator, StrictWeakOrdering> potentially_indirect_keys(derived_cast(exec), first, last, comp);
-  
-  thrust::system::cuda::detail::detail::stable_merge_sort(exec,
-                                                          potentially_indirect_keys.begin(),
-                                                          potentially_indirect_keys.end(),
-                                                          potentially_indirect_keys.comp());
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  typename enable_if_primitive_sort<RandomAccessIterator1,StrictWeakOrdering>::type
-    stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator1 keys_first,
-                       RandomAccessIterator1 keys_last,
-                       RandomAccessIterator2 values_first,
-                       StrictWeakOrdering comp)
-{
-  // path for thrust::stable_sort_by_key with primitive keys
-  // (e.g. int, float, short, etc.) and a less<T> or greater<T> comparison
-  // method is implemented with stable_primitive_sort_by_key
-  
-  // if comp is greater<T> then reverse the keys and values
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-  
-  // note, we also have to reverse the (unordered) input to preserve stability
-  if (reverse)
-  {
-    thrust::reverse(exec, keys_first,  keys_last);
-    thrust::reverse(exec, values_first, values_first + (keys_last - keys_first));
-  }
-  
-  // ensure sequences have trivial iterators
-  thrust::detail::trivial_sequence<RandomAccessIterator1,DerivedPolicy> keys(exec, keys_first, keys_last);
-  thrust::detail::trivial_sequence<RandomAccessIterator2,DerivedPolicy> values(exec, values_first, values_first + (keys_last - keys_first));
-  
-  thrust::system::cuda::detail::detail::stable_primitive_sort_by_key(exec, keys.begin(), keys.end(), values.begin());
-  
-  // copy results back, if necessary
-  if(!thrust::detail::is_trivial_iterator<RandomAccessIterator1>::value)
-      thrust::copy(exec, keys.begin(), keys.end(), keys_first);
-  if(!thrust::detail::is_trivial_iterator<RandomAccessIterator2>::value)
-      thrust::copy(exec, values.begin(), values.end(), values_first);
-  
-  if (reverse)
-  {
-    thrust::reverse(exec, keys_first,  keys_last);
-    thrust::reverse(exec, values_first, values_first + (keys_last - keys_first));
-  }
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  typename enable_if_comparison_sort<RandomAccessIterator1,StrictWeakOrdering>::type
-    stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                       RandomAccessIterator1 keys_first,
-                       RandomAccessIterator1 keys_last,
-                       RandomAccessIterator2 values_first,
-                       StrictWeakOrdering comp)
-{
-  // decide whether to apply indirection to either range
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type ValueType;
-  
-  typedef thrust::detail::integral_constant<bool, (sizeof(KeyType) > 8)> use_key_indirection;
-  typedef thrust::detail::integral_constant<bool, (sizeof(ValueType) > 4)> use_value_indirection;
-  
-  conditional_temporary_indirect_ordering<
-    use_key_indirection,
-    DerivedPolicy,
-    RandomAccessIterator1,
-    StrictWeakOrdering
-  > potentially_indirect_keys(derived_cast(exec), keys_first, keys_last, comp);
-  
-  conditional_temporary_indirect_permutation<
-    use_value_indirection,
-    DerivedPolicy,
-    RandomAccessIterator2
-  > potentially_indirect_values(derived_cast(exec), values_first, values_first + (keys_last - keys_first));
-  
-  thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec,
-                                                                 potentially_indirect_keys.begin(),
-                                                                 potentially_indirect_keys.end(),
-                                                                 potentially_indirect_values.begin(),
-                                                                 potentially_indirect_keys.comp());
-}
-
-
-} // end namespace stable_sort_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  // we're attempting to launch a kernel, assert we're compiling with nvcc
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-  
-  stable_sort_detail::stable_sort(exec, first, last, comp);
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp)
-{
-  // we're attempting to launch a kernel, assert we're compiling with nvcc
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to compile your code using nvcc, rather than g++ or cl.exe  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator1, THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC>::value) );
-  
-  stable_sort_detail::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp);
-}
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/swap_ranges.h b/compat/thrust/system/cuda/detail/swap_ranges.h
deleted file mode 100644
index 9b1949e60f..0000000000
--- a/compat/thrust/system/cuda/detail/swap_ranges.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// cuda has no special swap_ranges
-
diff --git a/compat/thrust/system/cuda/detail/synchronize.h b/compat/thrust/system/cuda/detail/synchronize.h
deleted file mode 100644
index 762f4a39c4..0000000000
--- a/compat/thrust/system/cuda/detail/synchronize.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-inline void synchronize(const char *message = "");
-
-inline void synchronize_if_enabled(const char *message = "");
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/synchronize.inl>
-
diff --git a/compat/thrust/system/cuda/detail/synchronize.inl b/compat/thrust/system/cuda/detail/synchronize.inl
deleted file mode 100644
index 5f70f799c4..0000000000
--- a/compat/thrust/system/cuda/detail/synchronize.inl
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/synchronize.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-#include <thrust/system/cuda/error.h>
-#include <thrust/system_error.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-void synchronize(const char *message)
-{
-  cudaError_t error = cudaThreadSynchronize();
-  if(error)
-  {
-    throw thrust::system_error(error, thrust::cuda_category(), std::string("synchronize: ") + message);
-  } // end if
-} // end synchronize()
-
-void synchronize_if_enabled(const char *message)
-{
-// XXX this could potentially be a runtime decision
-#if __THRUST_SYNCHRONOUS
-  synchronize(message);
-#else
-  // WAR "unused parameter" warning
-  (void) message;
-#endif
-} // end synchronize_if_enabled()
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/tabulate.h b/compat/thrust/system/cuda/detail/tabulate.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/tabulate.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/temporary_buffer.h b/compat/thrust/system/cuda/detail/temporary_buffer.h
deleted file mode 100644
index 628bd75719..0000000000
--- a/compat/thrust/system/cuda/detail/temporary_buffer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special temporary buffer functions
-
diff --git a/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h b/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h
deleted file mode 100644
index 3d05f44155..0000000000
--- a/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/temporary_array.h>
-#include <thrust/sequence.h>
-#include <thrust/gather.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  struct temporary_indirect_permutation
-{
-  private:
-    typedef unsigned int size_type;
-    typedef thrust::detail::temporary_array<size_type, DerivedPolicy> array_type;
-
-  public:
-    temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last)
-      : m_exec(exec),
-        m_src_first(first),
-        m_src_last(last),
-        m_permutation(0, m_exec, last - first)
-    {
-      // generate sorted index sequence
-      thrust::sequence(exec, m_permutation.begin(), m_permutation.end());
-    }
-
-    ~temporary_indirect_permutation()
-    {
-      // permute the source array using the indices
-      typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
-      thrust::detail::temporary_array<value_type, DerivedPolicy> temp(m_exec, m_src_first, m_src_last);
-      thrust::gather(m_exec, m_permutation.begin(), m_permutation.end(), temp.begin(), m_src_first);
-    }
-
-    typedef typename array_type::iterator iterator;
-
-    iterator begin()
-    {
-      return m_permutation.begin();
-    }
-
-    iterator end()
-    {
-      return m_permutation.end();
-    }
-
-  private:
-    DerivedPolicy &m_exec;
-    RandomAccessIterator m_src_first, m_src_last;
-    thrust::detail::temporary_array<size_type, DerivedPolicy> m_permutation;
-};
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator>
-  struct iterator_range_with_execution_policy
-{
-  iterator_range_with_execution_policy(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last)
-    : m_exec(exec), m_first(first), m_last(last)
-  {}
-
-  typedef RandomAccessIterator iterator;
-
-  iterator begin()
-  {
-    return m_first;
-  }
-
-  iterator end()
-  {
-    return m_last;
-  }
-
-  DerivedPolicy &exec()
-  {
-    return m_exec;
-  }
-
-  DerivedPolicy &m_exec;
-  RandomAccessIterator m_first, m_last;
-};
-
-
-template<typename Condition, typename DerivedPolicy, typename RandomAccessIterator>
-  struct conditional_temporary_indirect_permutation
-    : thrust::detail::eval_if<
-        Condition::value,
-        thrust::detail::identity_<temporary_indirect_permutation<DerivedPolicy, RandomAccessIterator> >,
-        thrust::detail::identity_<iterator_range_with_execution_policy<DerivedPolicy, RandomAccessIterator> >
-      >::type
-{
-  typedef typename thrust::detail::eval_if<
-    Condition::value,
-    thrust::detail::identity_<temporary_indirect_permutation<DerivedPolicy, RandomAccessIterator> >,
-    thrust::detail::identity_<iterator_range_with_execution_policy<DerivedPolicy, RandomAccessIterator> >
-  >::type super_t;
-
-  conditional_temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last)
-    : super_t(exec, first, last)
-  {}
-};
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator, typename Compare>
-  struct temporary_indirect_ordering
-    : temporary_indirect_permutation<DerivedPolicy,RandomAccessIterator>
-{
-  private:
-    typedef temporary_indirect_permutation<DerivedPolicy,RandomAccessIterator> super_t;
-
-  public:
-    temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp)
-      : super_t(exec, first, last),
-        m_comp(first, comp)
-    {}
-
-    struct compare
-    {
-      RandomAccessIterator first;
-
-      thrust::detail::host_device_function<
-        Compare,
-        bool
-      > comp;
-
-      compare(RandomAccessIterator first, Compare comp)
-        : first(first), comp(comp)
-      {}
-
-      template<typename Integral>
-      __host__ __device__
-      bool operator()(Integral a, Integral b)
-      {
-        return comp(first[a], first[b]);
-      }
-    };
-
-    compare comp() const
-    {
-      return m_comp;
-    }
-
-  private:
-    compare m_comp;
-};
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator, typename Compare>
-  struct iterator_range_with_execution_policy_and_compare
-    : iterator_range_with_execution_policy<DerivedPolicy, RandomAccessIterator>
-{
-  typedef iterator_range_with_execution_policy<DerivedPolicy, RandomAccessIterator> super_t;
-
-  iterator_range_with_execution_policy_and_compare(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp)
-    : super_t(exec, first, last), m_comp(comp)
-  {}
-
-  typedef Compare compare;
-
-  compare comp()
-  {
-    return m_comp;
-  }
-
-  Compare m_comp;
-};
-
-
-template<typename Condition, typename DerivedPolicy, typename RandomAccessIterator, typename Compare>
-  struct conditional_temporary_indirect_ordering
-    : thrust::detail::eval_if<
-        Condition::value,
-        thrust::detail::identity_<temporary_indirect_ordering<DerivedPolicy, RandomAccessIterator, Compare> >,
-        thrust::detail::identity_<iterator_range_with_execution_policy_and_compare<DerivedPolicy, RandomAccessIterator, Compare> >
-      >::type
-{
-  typedef typename thrust::detail::eval_if<
-    Condition::value,
-    thrust::detail::identity_<temporary_indirect_ordering<DerivedPolicy, RandomAccessIterator, Compare> >,
-    thrust::detail::identity_<iterator_range_with_execution_policy_and_compare<DerivedPolicy, RandomAccessIterator, Compare> >
-  >::type super_t;
-
-  conditional_temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp)
-    : super_t(exec, first, last, comp)
-  {}
-};
-
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/detail/transform.h b/compat/thrust/system/cuda/detail/transform.h
deleted file mode 100644
index 0af87056e7..0000000000
--- a/compat/thrust/system/cuda/detail/transform.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// cuda has no special transform
-
diff --git a/compat/thrust/system/cuda/detail/transform_reduce.h b/compat/thrust/system/cuda/detail/transform_reduce.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/transform_reduce.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/transform_scan.h b/compat/thrust/system/cuda/detail/transform_scan.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/transform_scan.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/trivial_copy.h b/compat/thrust/system/cuda/detail/trivial_copy.h
deleted file mode 100644
index e0e898aad4..0000000000
--- a/compat/thrust/system/cuda/detail/trivial_copy.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename Size,
-         typename RandomAccessIterator2>
-  void trivial_copy_n(execution_policy<DerivedPolicy> &exec,
-                      RandomAccessIterator1 first,
-                      Size n,
-                      RandomAccessIterator2 result);
-
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename Size,
-         typename RandomAccessIterator2>
-  void trivial_copy_n(cross_system<System1,System2> &exec,
-                      RandomAccessIterator1 first,
-                      Size n,
-                      RandomAccessIterator2 result);
-
-} // end detail
-} // end cuda
-} // end system
-} // end thrust
-
-#include <thrust/system/cuda/detail/trivial_copy.inl>
-
diff --git a/compat/thrust/system/cuda/detail/trivial_copy.inl b/compat/thrust/system/cuda/detail/trivial_copy.inl
deleted file mode 100644
index d23a4ef8c2..0000000000
--- a/compat/thrust/system/cuda/detail/trivial_copy.inl
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/trivial_copy.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-#include <thrust/system_error.h>
-#include <thrust/system/cuda/error.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/raw_pointer_cast.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-namespace detail
-{
-
-namespace trivial_copy_detail
-{
-
-inline void checked_cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind)
-{
-  cudaError_t error = cudaMemcpy(dst,src,count,kind);
-  if(error)
-  {
-    throw thrust::system_error(error, thrust::cuda_category());
-  } // end error
-} // end checked_cudaMemcpy()
-
-
-template<typename System1,
-         typename System2>
-  cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy<System1> &,
-                                  const thrust::cpp::execution_policy<System2> &)
-{
-  return cudaMemcpyDeviceToHost;
-} // end cuda_memcpy_kind()
-
-
-template<typename System1,
-         typename System2>
-  cudaMemcpyKind cuda_memcpy_kind(const thrust::cpp::execution_policy<System1> &,
-                                  const thrust::cuda::execution_policy<System2> &)
-{
-  return cudaMemcpyHostToDevice;
-} // end cuda_memcpy_kind()
-
-
-} // end namespace trivial_copy_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename Size,
-         typename RandomAccessIterator2>
-  void trivial_copy_n(execution_policy<DerivedPolicy> &exec,
-                      RandomAccessIterator1 first,
-                      Size n,
-                      RandomAccessIterator2 result)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type T;
-
-  void *dst = thrust::raw_pointer_cast(&*result);
-  const void *src = thrust::raw_pointer_cast(&*first);
-
-  trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), cudaMemcpyDeviceToDevice);
-}
-
-
-template<typename System1,
-         typename System2,
-         typename RandomAccessIterator1,
-         typename Size,
-         typename RandomAccessIterator2>
-  void trivial_copy_n(cross_system<System1,System2> &systems,
-                      RandomAccessIterator1 first,
-                      Size n,
-                      RandomAccessIterator2 result)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type T;
-
-  void *dst = thrust::raw_pointer_cast(&*result);
-  const void *src = thrust::raw_pointer_cast(&*first);
-
-  cudaMemcpyKind kind = trivial_copy_detail::cuda_memcpy_kind(thrust::detail::derived_cast(systems.system1), thrust::detail::derived_cast(systems.system2));
-
-  trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), kind);
-}
-
-
-} // end namespace detail
-} // end namespace cuda
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/cuda/detail/uninitialized_copy.h b/compat/thrust/system/cuda/detail/uninitialized_copy.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/uninitialized_copy.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/uninitialized_fill.h b/compat/thrust/system/cuda/detail/uninitialized_fill.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/uninitialized_fill.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/unique.h b/compat/thrust/system/cuda/detail/unique.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/unique.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/unique_by_key.h b/compat/thrust/system/cuda/detail/unique_by_key.h
deleted file mode 100644
index a307fc5fc8..0000000000
--- a/compat/thrust/system/cuda/detail/unique_by_key.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special version of this algorithm 
-
diff --git a/compat/thrust/system/cuda/detail/vector.inl b/compat/thrust/system/cuda/detail/vector.inl
deleted file mode 100644
index 36598764b2..0000000000
--- a/compat/thrust/system/cuda/detail/vector.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ccudaliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/vector.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector()
-      : super_t()
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n)
-      : super_t(n)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n, const value_type &value)
-      : super_t(n,value)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(const vector &x)
-      : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const std::vector<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename InputIterator>
-    vector<T,Allocator>
-      ::vector(InputIterator first, InputIterator last)
-        : super_t(first,last)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const std::vector<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-      
-} // end cuda
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/error.h b/compat/thrust/system/cuda/error.h
deleted file mode 100644
index 8d098538db..0000000000
--- a/compat/thrust/system/cuda/error.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file thrust/system/cuda/error.h
- *  \brief CUDA-specific error reporting
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/error_code.h>
-#include <driver_types.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-namespace cuda
-{
-
-/*! \addtogroup system
- *  \{
- */
-
-// To construct an error_code after a CUDA Runtime error:
-//
-//   error_code(::cudaGetLastError(), cuda_category())
-
-// XXX N3000 prefers enum class errc { ... }
-namespace errc
-{
-
-/*! \p errc_t enumerates the kinds of CUDA Runtime errors.
- */
-enum errc_t
-{
-  // from cuda/include/driver_types.h
-  // mirror their order
-  success                            = cudaSuccess,
-  missing_configuration              = cudaErrorMissingConfiguration,
-  memory_allocation                  = cudaErrorMemoryAllocation,
-  initialization_error               = cudaErrorInitializationError,
-  launch_failure                     = cudaErrorLaunchFailure,
-  prior_launch_failure               = cudaErrorPriorLaunchFailure,
-  launch_timeout                     = cudaErrorLaunchTimeout,
-  launch_out_of_resources            = cudaErrorLaunchOutOfResources,
-  invalid_device_function            = cudaErrorInvalidDeviceFunction,
-  invalid_configuration              = cudaErrorInvalidConfiguration,
-  invalid_device                     = cudaErrorInvalidDevice,
-  invalid_value                      = cudaErrorInvalidValue,
-  invalid_pitch_value                = cudaErrorInvalidPitchValue,
-  invalid_symbol                     = cudaErrorInvalidSymbol,
-  map_buffer_object_failed           = cudaErrorMapBufferObjectFailed,
-  unmap_buffer_object_failed         = cudaErrorUnmapBufferObjectFailed,
-  invalid_host_pointer               = cudaErrorInvalidHostPointer,
-  invalid_device_pointer             = cudaErrorInvalidDevicePointer,
-  invalid_texture                    = cudaErrorInvalidTexture,
-  invalid_texture_binding            = cudaErrorInvalidTextureBinding,
-  invalid_channel_descriptor         = cudaErrorInvalidChannelDescriptor,
-  invalid_memcpy_direction           = cudaErrorInvalidMemcpyDirection,
-  address_of_constant_error          = cudaErrorAddressOfConstant,
-  texture_fetch_failed               = cudaErrorTextureFetchFailed,
-  texture_not_bound                  = cudaErrorTextureNotBound,
-  synchronization_error              = cudaErrorSynchronizationError,
-  invalid_filter_setting             = cudaErrorInvalidFilterSetting,
-  invalid_norm_setting               = cudaErrorInvalidNormSetting,
-  mixed_device_execution             = cudaErrorMixedDeviceExecution,
-  cuda_runtime_unloading             = cudaErrorCudartUnloading,
-  unknown                            = cudaErrorUnknown,
-  not_yet_implemented                = cudaErrorNotYetImplemented,
-  memory_value_too_large             = cudaErrorMemoryValueTooLarge,
-  invalid_resource_handle            = cudaErrorInvalidResourceHandle,
-  not_ready                          = cudaErrorNotReady,
-  insufficient_driver                = cudaErrorInsufficientDriver,
-  set_on_active_process_error        = cudaErrorSetOnActiveProcess,
-  no_device                          = cudaErrorNoDevice,
-  ecc_uncorrectable                  = cudaErrorECCUncorrectable,
-
-#if CUDART_VERSION >= 4020
-  shared_object_symbol_not_found     = cudaErrorSharedObjectSymbolNotFound,
-  shared_object_init_failed          = cudaErrorSharedObjectInitFailed,
-  unsupported_limit                  = cudaErrorUnsupportedLimit,
-  duplicate_variable_name            = cudaErrorDuplicateVariableName,
-  duplicate_texture_name             = cudaErrorDuplicateTextureName,
-  duplicate_surface_name             = cudaErrorDuplicateSurfaceName,
-  devices_unavailable                = cudaErrorDevicesUnavailable,
-  invalid_kernel_image               = cudaErrorInvalidKernelImage,
-  no_kernel_image_for_device         = cudaErrorNoKernelImageForDevice,
-  incompatible_driver_context        = cudaErrorIncompatibleDriverContext,
-  peer_access_already_enabled        = cudaErrorPeerAccessAlreadyEnabled,
-  peer_access_not_enabled            = cudaErrorPeerAccessNotEnabled,
-  device_already_in_use              = cudaErrorDeviceAlreadyInUse,
-  profiler_disabled                  = cudaErrorProfilerDisabled,
-  assert_triggered                   = cudaErrorAssert,
-  too_many_peers                     = cudaErrorTooManyPeers,
-  host_memory_already_registered     = cudaErrorHostMemoryAlreadyRegistered,
-  host_memory_not_registered         = cudaErrorHostMemoryNotRegistered,
-  operating_system_error             = cudaErrorOperatingSystem,
-#endif
-
-#if CUDART_VERSION >= 5000
-  peer_access_unsupported            = cudaErrorPeerAccessUnsupported,
-  launch_max_depth_exceeded          = cudaErrorLaunchMaxDepthExceeded,
-  launch_file_scoped_texture_used    = cudaErrorLaunchFileScopedTex,
-  launch_file_scoped_surface_used    = cudaErrorLaunchFileScopedSurf,
-  sync_depth_exceeded                = cudaErrorSyncDepthExceeded,
-  attempted_operation_not_permitted  = cudaErrorNotPermitted,
-  attempted_operation_not_supported  = cudaErrorNotSupported,
-#endif
-
-  startup_failure                    = cudaErrorStartupFailure
-}; // end errc_t
-
-
-} // end namespace errc
-
-} // end namespace cuda
-
-/*! \return A reference to an object of a type derived from class \p thrust::error_category.
- *  \note The object's \p equivalent virtual functions shall behave as specified
- *        for the class \p thrust::error_category. The object's \p name virtual function shall
- *        return a pointer to the string <tt>"cuda"</tt>. The object's
- *        \p default_error_condition virtual function shall behave as follows:
- *
- *        If the argument <tt>ev</tt> corresponds to a CUDA error value, the function
- *        shall return <tt>error_condition(ev,cuda_category())</tt>.
- *        Otherwise, the function shall return <tt>system_category.default_error_condition(ev)</tt>.
- */
-inline const error_category &cuda_category(void);
-
-
-// XXX N3000 prefers is_error_code_enum<cuda::errc>
-
-/*! Specialization of \p is_error_code_enum for \p cuda::errc::errc_t
- */
-template<> struct is_error_code_enum<cuda::errc::errc_t> : thrust::detail::true_type {};
-
-
-// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x
-/*! \return <tt>error_code(static_cast<int>(e), cuda::error_category())</tt>
- */
-inline error_code make_error_code(cuda::errc::errc_t e);
-
-
-// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x
-/*! \return <tt>error_condition(static_cast<int>(e), cuda::error_category())</tt>.
- */
-inline error_condition make_error_condition(cuda::errc::errc_t e);
-
-/*! \} // end system
- */
-
-
-} // end system
-
-namespace cuda
-{
-
-// XXX replace with using system::cuda_errc upon c++0x
-namespace errc = system::cuda::errc;
-
-} // end cuda
-
-using system::cuda_category;
-
-} // end namespace thrust
-
-#include <thrust/system/cuda/detail/error.inl>
-
diff --git a/compat/thrust/system/cuda/execution_policy.h b/compat/thrust/system/cuda/execution_policy.h
deleted file mode 100644
index bbd33defd4..0000000000
--- a/compat/thrust/system/cuda/execution_policy.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-/*! \file thrust/system/cuda/execution_policy.h
- *  \brief Execution policies for Thrust's CUDA system.
- */
-
-#include <thrust/detail/config.h>
-
-// get the execution policies definitions first
-#include <thrust/system/cuda/detail/execution_policy.h>
-
-// get the definition of par
-#include <thrust/system/cuda/detail/par.h>
-
-// now get all the algorithm defintitions
-
-// the order of the following #includes seems to matter, unfortunately
-
-// primitives come first, in order of increasing sophistication
-#include <thrust/system/cuda/detail/get_value.h>
-#include <thrust/system/cuda/detail/assign_value.h>
-#include <thrust/system/cuda/detail/iter_swap.h>
-
-#include <thrust/system/cuda/detail/for_each.h>
-#include <thrust/system/cuda/detail/copy.h>
-#include <thrust/system/cuda/detail/reduce.h>
-#include <thrust/system/cuda/detail/scan.h>
-#include <thrust/system/cuda/detail/sort.h>
-
-// these are alphabetical
-#include <thrust/system/cuda/detail/adjacent_difference.h>
-#include <thrust/system/cuda/detail/assign_value.h>
-#include <thrust/system/cuda/detail/binary_search.h>
-#include <thrust/system/cuda/detail/copy_if.h>
-#include <thrust/system/cuda/detail/count.h>
-#include <thrust/system/cuda/detail/equal.h>
-#include <thrust/system/cuda/detail/extrema.h>
-#include <thrust/system/cuda/detail/fill.h>
-#include <thrust/system/cuda/detail/find.h>
-#include <thrust/system/cuda/detail/gather.h>
-#include <thrust/system/cuda/detail/generate.h>
-#include <thrust/system/cuda/detail/inner_product.h>
-#include <thrust/system/cuda/detail/iter_swap.h>
-#include <thrust/system/cuda/detail/logical.h>
-#include <thrust/system/cuda/detail/malloc_and_free.h>
-#include <thrust/system/cuda/detail/merge.h>
-#include <thrust/system/cuda/detail/mismatch.h>
-#include <thrust/system/cuda/detail/partition.h>
-#include <thrust/system/cuda/detail/reduce_by_key.h>
-#include <thrust/system/cuda/detail/remove.h>
-#include <thrust/system/cuda/detail/replace.h>
-#include <thrust/system/cuda/detail/reverse.h>
-#include <thrust/system/cuda/detail/scan_by_key.h>
-#include <thrust/system/cuda/detail/scatter.h>
-#include <thrust/system/cuda/detail/sequence.h>
-#include <thrust/system/cuda/detail/set_operations.h>
-#include <thrust/system/cuda/detail/sort.h>
-#include <thrust/system/cuda/detail/swap_ranges.h>
-#include <thrust/system/cuda/detail/tabulate.h>
-#include <thrust/system/cuda/detail/transform.h>
-#include <thrust/system/cuda/detail/transform_reduce.h>
-#include <thrust/system/cuda/detail/transform_scan.h>
-#include <thrust/system/cuda/detail/uninitialized_copy.h>
-#include <thrust/system/cuda/detail/uninitialized_fill.h>
-#include <thrust/system/cuda/detail/unique.h>
-#include <thrust/system/cuda/detail/unique_by_key.h>
-
-
-// define these entities here for the purpose of Doxygenating them
-// they are actually defined elsewhere
-#if 0
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-
-
-/*! \addtogroup execution_policies
- *  \{
- */
-
-
-/*! \p thrust::cuda::execution_policy is the base class for all Thrust parallel execution
- *  policies which are derived from Thrust's CUDA backend system.
- */
-template<typename DerivedPolicy>
-struct execution_policy : thrust::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p cuda::tag is a type representing Thrust's CUDA backend system in C++'s type system.
- *  Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be
- *  "dispatched" to algorithm implementations in the \p cuda system.
- */
-struct tag : thrust::system::cuda::execution_policy<tag> { unspecified };
-
-
-/*! \p thrust::cuda::par is the parallel execution policy associated with Thrust's CUDA
- *  backend system.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may
- *  directly target Thrust's CUDA backend system by providing \p thrust::cuda::par as an algorithm
- *  parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such
- *  as \p thrust::cuda::vector.
- *
- *  The type of \p thrust::cuda::par is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::cuda::par to explicitly dispatch an
- *  invocation of \p thrust::for_each to the CUDA backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/system/cuda/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  int vec[3];
- *  vec[0] = 0; vec[1] = 1; vec[2] = 2;
- *
- *  thrust::for_each(thrust::cuda::par, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- */
-static const unspecified par;
-
-
-/*! \}
- */
-
-
-} // end cuda
-} // end system
-} // end thrust
-#endif
-
-
diff --git a/compat/thrust/system/cuda/experimental/pinned_allocator.h b/compat/thrust/system/cuda/experimental/pinned_allocator.h
deleted file mode 100644
index 5294659e62..0000000000
--- a/compat/thrust/system/cuda/experimental/pinned_allocator.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/cuda/experimental/pinned_allocator.h
- *  \brief An allocator which creates new elements in "pinned" memory with \p cudaMallocHost
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
-#include <stdexcept>
-#include <limits>
-#include <string>
-#include <thrust/system/system_error.h>
-#include <thrust/system/cuda/error.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-namespace cuda
-{
-
-namespace experimental
-{
-
-/*! \addtogroup memory_management Memory Management
- *  \addtogroup memory_management_classes
- *  \ingroup memory_management
- *  \{
- */
-
-/*! \p pinned_allocator is a CUDA-specific host memory allocator
- *  that employs \c cudaMallocHost for allocation.
- *
- *  \see http://www.sgi.com/tech/stl/Allocators.html
- */
-template<typename T> class pinned_allocator;
-
-template<>
-  class pinned_allocator<void>
-{
-  public:
-    typedef void           value_type;
-    typedef void       *   pointer;
-    typedef const void *   const_pointer;
-    typedef std::size_t    size_type;
-    typedef std::ptrdiff_t difference_type;
-
-    // convert a pinned_allocator<void> to pinned_allocator<U>
-    template<typename U>
-      struct rebind
-    {
-      typedef pinned_allocator<U> other;
-    }; // end rebind
-}; // end pinned_allocator
-
-
-template<typename T>
-  class pinned_allocator
-{
-  public:
-    typedef T              value_type;
-    typedef T*             pointer;
-    typedef const T*       const_pointer;
-    typedef T&             reference;
-    typedef const T&       const_reference;
-    typedef std::size_t    size_type;
-    typedef std::ptrdiff_t difference_type;
-
-    // convert a pinned_allocator<T> to pinned_allocator<U>
-    template<typename U>
-      struct rebind
-    {
-      typedef pinned_allocator<U> other;
-    }; // end rebind
-
-    /*! \p pinned_allocator's null constructor does nothing.
-     */
-    __host__ __device__
-    inline pinned_allocator() {}
-
-    /*! \p pinned_allocator's null destructor does nothing.
-     */
-    __host__ __device__
-    inline ~pinned_allocator() {}
-
-    /*! \p pinned_allocator's copy constructor does nothing.
-     */
-    __host__ __device__
-    inline pinned_allocator(pinned_allocator const &) {}
-
-    /*! This version of \p pinned_allocator's copy constructor
-     *  is templated on the \c value_type of the \p pinned_allocator
-     *  to copy from.  It is provided merely for convenience; it
-     *  does nothing.
-     */
-    template<typename U>
-    __host__ __device__
-    inline pinned_allocator(pinned_allocator<U> const &) {}
-
-    /*! This method returns the address of a \c reference of
-     *  interest.
-     *
-     *  \p r The \c reference of interest.
-     *  \return \c r's address.
-     */
-    __host__ __device__
-    inline pointer address(reference r) { return &r; }
-
-    /*! This method returns the address of a \c const_reference
-     *  of interest.
-     *
-     *  \p r The \c const_reference of interest.
-     *  \return \c r's address.
-     */
-    __host__ __device__
-    inline const_pointer address(const_reference r) { return &r; }
-
-    /*! This method allocates storage for objects in pinned host
-     *  memory.
-     *
-     *  \p cnt The number of objects to allocate.
-     *  \return a \c pointer to the newly allocated objects.
-     *  \note This method does not invoke \p value_type's constructor.
-     *        It is the responsibility of the caller to initialize the
-     *        objects at the returned \c pointer. 
-     */
-    __host__
-    inline pointer allocate(size_type cnt,
-                            const_pointer = 0)
-    {
-      if(cnt > this->max_size())
-      {
-        throw std::bad_alloc();
-      } // end if
-
-      pointer result(0);
-      cudaError_t error = cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type));
-
-      if(error)
-      {
-        throw std::bad_alloc();
-      } // end if
-
-      return result;
-    } // end allocate()
-
-    /*! This method deallocates pinned host memory previously allocated
-     *  with this \c pinned_allocator.
-     *
-     *  \p p A \c pointer to the previously allocated memory.
-     *  \p cnt The number of objects previously allocated at
-     *         \p p.
-     *  \note This method does not invoke \p value_type's destructor.
-     *        It is the responsibility of the caller to destroy
-     *        the objects stored at \p p.
-     */
-    __host__
-    inline void deallocate(pointer p, size_type cnt)
-    {
-      cudaError_t error = cudaFreeHost(p);
-      
-      if(error)
-      {
-        throw thrust::system_error(error, thrust::cuda_category());
-      } // end if
-    } // end deallocate()
-
-    /*! This method returns the maximum size of the \c cnt parameter
-     *  accepted by the \p allocate() method.
-     *
-     *  \return The maximum number of objects that may be allocated
-     *          by a single call to \p allocate().
-     */
-    inline size_type max_size() const
-    {
-      return (std::numeric_limits<size_type>::max)() / sizeof(T);
-    } // end max_size()
-
-    /*! This method tests this \p pinned_allocator for equality to
-     *  another.
-     *
-     *  \param x The other \p pinned_allocator of interest.
-     *  \return This method always returns \c true.
-     */
-    __host__ __device__
-    inline bool operator==(pinned_allocator const& x) { return true; }
-
-    /*! This method tests this \p pinned_allocator for inequality
-     *  to another.
-     *
-     *  \param x The other \p pinned_allocator of interest.
-     *  \return This method always returns \c false.
-     */
-    __host__ __device__
-    inline bool operator!=(pinned_allocator const &x) { return !operator==(x); }
-}; // end pinned_allocator
-
-/*! \}
- */
-
-} // end experimental
-
-} // end cuda
-
-} // end system
-
-// alias cuda's members at top-level
-namespace cuda
-{
-
-namespace experimental
-{
-
-using thrust::system::cuda::experimental::pinned_allocator;
-
-} // end experimental
-
-} // end cuda
-
-} // end thrust
-
diff --git a/compat/thrust/system/cuda/memory.h b/compat/thrust/system/cuda/memory.h
deleted file mode 100644
index 368eea265a..0000000000
--- a/compat/thrust/system/cuda/memory.h
+++ /dev/null
@@ -1,421 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ccudaliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/cuda/memory.h
- *  \brief Managing memory associated with Thrust's CUDA system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/detail/execution_policy.h>
-#include <thrust/memory.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/allocator/malloc_allocator.h>
-#include <ostream>
-
-namespace thrust
-{
-namespace system
-{
-namespace cuda
-{
-
-template<typename> class pointer;
-
-} // end cuda
-} // end system
-} // end thrust
-
-
-/*! \cond
- */
-
-// specialize std::iterator_traits to avoid problems with the name of
-// pointer's constructor shadowing its nested pointer type
-// do this before pointer is defined so the specialization is correctly
-// used inside the definition
-namespace std
-{
-
-template<typename Element>
-  struct iterator_traits<thrust::system::cuda::pointer<Element> >
-{
-  private:
-    typedef thrust::system::cuda::pointer<Element> ptr;
-
-  public:
-    typedef typename ptr::iterator_category       iterator_category;
-    typedef typename ptr::value_type              value_type;
-    typedef typename ptr::difference_type         difference_type;
-    typedef ptr                                   pointer;
-    typedef typename ptr::reference               reference;
-}; // end iterator_traits
-
-} // end std
-
-/*! \endcond
- */
-
-
-namespace thrust
-{
-namespace system
-{
-
-/*! \addtogroup system_backends Systems
- *  \ingroup system
- *  \{
- */
-
-/*! \namespace thrust::system::cuda
- *  \brief \p thrust::system::cuda is the namespace containing functionality for allocating, manipulating,
- *         and deallocating memory available to Thrust's CUDA backend system.
- *         The identifiers are provided in a separate namespace underneath <tt>thrust::system</tt>
- *         for import convenience but are also aliased in the top-level <tt>thrust::tbb</tt>
- *         namespace for easy access.
- *
- */
-namespace cuda
-{
-
-// forward declaration of reference for pointer
-template<typename Element> class reference;
-
-/*! \cond
- */
-
-// XXX nvcc + msvc have trouble instantiating reference below
-//     this is a workaround
-namespace detail
-{
-
-template<typename Element>
-  struct reference_msvc_workaround
-{
-  typedef thrust::system::cuda::reference<Element> type;
-}; // end reference_msvc_workaround
-
-} // end detail
-
-/*! \endcond
- */
-
-#if 0
-/*! \p cuda::tag is type representing Thrust's CUDA backend system in C++'s type system.
- *  Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be
- *  "dispatched" to algorithm implementations in the \p cuda system.
- */
-struct tag { unspecified };
-#endif
-
-/*! \p pointer stores a pointer to an object allocated in memory available to the cuda system.
- *  This type provides type safety when dispatching standard algorithms on ranges resident
- *  in cuda memory.
- *
- *  \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic.
- *
- *  \p pointer can be created with the function \p cuda::malloc, or by explicitly calling its constructor
- *  with a raw pointer.
- *
- *  The raw pointer encapsulated by a \p pointer may be obtained by eiter its <tt>get</tt> member function
- *  or the \p raw_pointer_cast function.
- *
- *  \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory
- *  pointed to by \p pointer.
- *
- *  \tparam T specifies the type of the pointee.
- *
- *  \see cuda::malloc
- *  \see cuda::free
- *  \see raw_pointer_cast
- */
-template<typename T>
-  class pointer
-    : public thrust::pointer<
-               T,
-               thrust::system::cuda::tag,
-               thrust::system::cuda::reference<T>,
-               thrust::system::cuda::pointer<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::pointer<
-      T,
-      thrust::system::cuda::tag,
-      //thrust::system::cuda::reference<T>,
-      typename detail::reference_msvc_workaround<T>::type,
-      thrust::system::cuda::pointer<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-
-    /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0.
-     */
-    __host__ __device__
-    pointer() : super_t() {}
-
-    /*! This constructor allows construction of a <tt>pointer<const T></tt> from a <tt>T*</tt>.
-     *
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in memory
-     *         accessible by the \p tbb system.
-     *  \tparam OtherT \p OtherT shall be convertible to \p T.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    explicit pointer(OtherT *ptr) : super_t(ptr) {}
-
-    /*! This constructor allows construction from another pointer-like object with related type.
-     *
-     *  \param other The \p OtherPointer to copy.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::cuda::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer
-            >::type * = 0) : super_t(other) {}
-
-    /*! Assignment operator allows assigning from another pointer-like object with related type.
-     *
-     *  \param other The other pointer-like object to assign from.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::cuda::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      pointer &
-    >::type
-    operator=(const OtherPointer &other)
-    {
-      return super_t::operator=(other);
-    }
-}; // end pointer
-
-
-/*! \p reference is a wrapped reference to an object stored in memory available to the \p cuda system.
- *  \p reference is the type of the result of dereferencing a \p cuda::pointer.
- *
- *  \tparam T Specifies the type of the referenced object.
- */
-template<typename T>
-  class reference
-    : public thrust::reference<
-               T,
-               thrust::system::cuda::pointer<T>,
-               thrust::system::cuda::reference<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::reference<
-      T,
-      thrust::system::cuda::pointer<T>,
-      thrust::system::cuda::reference<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    /*! \cond
-     */
-
-    typedef typename super_t::value_type value_type;
-    typedef typename super_t::pointer    pointer;
-
-    /*! \endcond
-     */
-
-    /*! This constructor initializes this \p reference to refer to an object
-     *  pointed to by the given \p pointer. After this \p reference is constructed,
-     *  it shall refer to the object pointed to by \p ptr.
-     *
-     *  \param ptr A \p pointer to copy from.
-     */
-    __host__ __device__
-    explicit reference(const pointer &ptr)
-      : super_t(ptr)
-    {}
-
-    /*! This constructor accepts a const reference to another \p reference of related type.
-     *  After this \p reference is constructed, it shall refer to the same object as \p other.
-     *
-     *  \param other A \p reference to copy from.
-     *  \tparam OtherT The element type of the other \p reference.
-     *
-     *  \note This constructor is templated primarily to allow initialization of <tt>reference<const T></tt>
-     *        from <tt>reference<T></tt>.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    reference(const reference<OtherT> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherT>::pointer,
-                pointer
-              >::type * = 0)
-      : super_t(other)
-    {}
-
-    /*! Copy assignment operator copy assigns from another \p reference of related type.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>*this</tt>
-     *  \tparam OtherT The element type of the other \p reference.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    reference &operator=(const reference<OtherT> &other);
-
-    /*! Assignment operator assigns from a \p value_type.
-     *
-     *  \param x The \p value_type to assign from.
-     *  \return <tt>*this</tt>
-     */
-    __host__ __device__
-    reference &operator=(const value_type &x);
-}; // end reference
-
-/*! Exchanges the values of two objects referred to by \p reference.
- *  \p x The first \p reference of interest.
- *  \p y The second \p reference ot interest.
- */
-template<typename T>
-__host__ __device__
-void swap(reference<T> x, reference<T> y);
-
-/*! Allocates an area of memory available to Thrust's <tt>cuda</tt> system.
- *  \param n Number of bytes to allocate.
- *  \return A <tt>cuda::pointer<void></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>cuda::pointer<void></tt> is returned if
- *          an error occurs.
- *  \note The <tt>cuda::pointer<void></tt> returned by this function must be
- *        deallocated with \p cuda::free.
- *  \see cuda::free
- *  \see std::malloc
- */
-inline pointer<void> malloc(std::size_t n);
-
-/*! Allocates a typed area of memory available to Thrust's <tt>cuda</tt> system.
- *  \param n Number of elements to allocate.
- *  \return A <tt>cuda::pointer<T></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>cuda::pointer<T></tt> is returned if
- *          an error occurs.
- *  \note The <tt>cuda::pointer<T></tt> returned by this function must be
- *        deallocated with \p cuda::free.
- *  \see cuda::free
- *  \see std::malloc
- */
-template<typename T>
-inline pointer<T> malloc(std::size_t n);
-
-/*! Deallocates an area of memory previously allocated by <tt>cuda::malloc</tt>.
- *  \param ptr A <tt>cuda::pointer<void></tt> pointing to the beginning of an area
- *         of memory previously allocated with <tt>cuda::malloc</tt>.
- *  \see cuda::malloc
- *  \see std::free
- */
-inline void free(pointer<void> ptr);
-
-// XXX upon c++11
-// template<typename T> using allocator = thrust::detail::malloc_allocator<T,tag,pointer<T> >;
-
-/*! \p cuda::allocator is the default allocator used by the \p cuda system's containers such as
- *  <tt>cuda::vector</tt> if no user-specified allocator is provided. \p cuda::allocator allocates
- *  (deallocates) storage with \p cuda::malloc (\p cuda::free).
- */
-template<typename T>
-  struct allocator
-    : thrust::detail::malloc_allocator<
-        T,
-        tag,
-        pointer<T>
-      >
-{
-  /*! The \p rebind metafunction provides the type of an \p allocator
-   *  instantiated with another type.
-   *
-   *  \tparam U The other type to use for instantiation.
-   */
-  template<typename U>
-    struct rebind
-  {
-    /*! The typedef \p other gives the type of the rebound \p allocator.
-     */
-    typedef allocator<U> other;
-  };
-
-  /*! No-argument constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator() {}
-
-  /*! Copy constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator(const allocator &) {}
-
-  /*! Constructor from other \p allocator has no effect.
-   */
-  template<typename U>
-  __host__ __device__
-  inline allocator(const allocator<U> &) {}
-
-  /*! Destructor has no effect.
-   */
-  __host__ __device__
-  inline ~allocator() {}
-}; // end allocator
-
-} // end cuda
-
-/*! \}
- */
-
-} // end system
-
-/*! \namespace thrust::cuda
- *  \brief \p thrust::cuda is a top-level alias for thrust::system::cuda.
- */
-namespace cuda
-{
-
-using thrust::system::cuda::pointer;
-using thrust::system::cuda::reference;
-using thrust::system::cuda::malloc;
-using thrust::system::cuda::free;
-using thrust::system::cuda::allocator;
-
-} // end cuda
-
-} // end thrust
-
-#include <thrust/system/cuda/detail/memory.inl>
-
diff --git a/compat/thrust/system/cuda/vector.h b/compat/thrust/system/cuda/vector.h
deleted file mode 100644
index ac47a84e1d..0000000000
--- a/compat/thrust/system/cuda/vector.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ccudaliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/cuda/vector.h
- *  \brief A dynamically-sizable array of elements which reside in memory available to
- *         Thrust's CUDA system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cuda/memory.h>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-
-// forward declaration of host_vector
-template<typename T, typename Allocator> class host_vector;
-
-namespace system
-{
-namespace cuda
-{
-
-// XXX upon c++11
-// template<typename T, typename Allocator = allocator<T> > using vector = thrust::detail::vector_base<T,Allocator>;
-
-/*! \p cuda::vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p cuda::vector may vary dynamically; memory management is
- *  automatic. The elements contained in a \p cuda::vector reside in memory
- *  available to the \p cuda system.
- *
- *  \tparam T The element type of the \p cuda::vector.
- *  \tparam Allocator The allocator type of the \p cuda::vector. Defaults to \p cuda::allocator.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see host_vector For the documentation of the complete interface which is
- *                   shared by \p cuda::vector
- *  \see device_vector
- */
-template<typename T, typename Allocator = allocator<T> >
-  class vector
-    : public thrust::detail::vector_base<T,Allocator>
-{
-  /*! \cond
-   */
-  private:
-    typedef thrust::detail::vector_base<T,Allocator> super_t;
-  /*! \endcond
-   */
-
-  public:
-
-  /*! \cond
-   */
-    typedef typename super_t::size_type  size_type;
-    typedef typename super_t::value_type value_type;
-  /*! \endcond
-   */
-
-    /*! This constructor creates an empty \p cuda::vector.
-     */
-    vector();
-
-    /*! This constructor creates a \p cuda::vector with \p n default-constructed elements.
-     *  \param n The size of the \p cuda::vector to create.
-     */
-    explicit vector(size_type n);
-
-    /*! This constructor creates a \p cuda::vector with \p n copies of \p value.
-     *  \param n The size of the \p cuda::vector to create.
-     *  \param value An element to copy.
-     */
-    explicit vector(size_type n, const value_type &value);
-
-    /*! Copy constructor copies from another \p cuda::vector.
-     *  \param x The other \p cuda::vector to copy.
-     */
-    vector(const vector &x);
-
-    /*! This constructor copies from another Thrust vector-like object.
-     *  \param x The other object to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-
-    /*! This constructor copies from a \c std::vector.
-     *  \param x The \c std::vector to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! This constructor creates a \p cuda::vector by copying from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    vector(InputIterator first, InputIterator last);
-
-    // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns
-    //
-    /*! Assignment operator assigns from a \c std::vector.
-     *  \param x The \c std::vector to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! Assignment operator assigns from another Thrust vector-like object.
-     *  \param x The other object to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-}; // end vector
-
-} // end cuda
-} // end system
-
-// alias system::cuda names at top-level
-namespace cuda
-{
-
-using thrust::system::cuda::vector;
-
-} // end cuda
-
-} // end thrust
-
-#include <thrust/system/cuda/detail/vector.inl>
-
diff --git a/compat/thrust/system/detail/adl/adjacent_difference.h b/compat/thrust/system/detail/adl/adjacent_difference.h
deleted file mode 100644
index 246c1163bb..0000000000
--- a/compat/thrust/system/detail/adl/adjacent_difference.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the adjacent_difference.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch adjacent_difference
-
-#define __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/adjacent_difference.h>
-#include __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER
-#undef __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/adjacent_difference.h>
-#include __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/assign_value.h b/compat/thrust/system/detail/adl/assign_value.h
deleted file mode 100644
index b5c588ace7..0000000000
--- a/compat/thrust/system/detail/adl/assign_value.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the assign_value.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch assign_value
-
-#define __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/assign_value.h>
-#include __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER
-#undef __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/assign_value.h>
-#include __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/binary_search.h b/compat/thrust/system/detail/adl/binary_search.h
deleted file mode 100644
index 7accfbc381..0000000000
--- a/compat/thrust/system/detail/adl/binary_search.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the binary_search.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch binary_search
-
-#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/binary_search.h>
-#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER
-#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/binary_search.h>
-#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER
-#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER
-
diff --git a/compat/thrust/system/detail/adl/copy.h b/compat/thrust/system/detail/adl/copy.h
deleted file mode 100644
index 91a32cd345..0000000000
--- a/compat/thrust/system/detail/adl/copy.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the copy.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch copy
-
-#define __THRUST_HOST_SYSTEM_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy.h>
-#include __THRUST_HOST_SYSTEM_COPY_HEADER
-#undef __THRUST_HOST_SYSTEM_COPY_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy.h>
-#include __THRUST_DEVICE_SYSTEM_COPY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_COPY_HEADER
-
diff --git a/compat/thrust/system/detail/adl/copy_if.h b/compat/thrust/system/detail/adl/copy_if.h
deleted file mode 100644
index fd1df977ab..0000000000
--- a/compat/thrust/system/detail/adl/copy_if.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy_if.h of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the copy_if.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch copy_if
-
-#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy_if.h>
-#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER
-#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy_if.h>
-#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER
-#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER
-
diff --git a/compat/thrust/system/detail/adl/count.h b/compat/thrust/system/detail/adl/count.h
deleted file mode 100644
index 0dd9591a23..0000000000
--- a/compat/thrust/system/detail/adl/count.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a count of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the count.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch count
-
-#define __THRUST_HOST_SYSTEM_COUNT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/count.h>
-#include __THRUST_HOST_SYSTEM_COUNT_HEADER
-#undef __THRUST_HOST_SYSTEM_COUNT_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_COUNT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/count.h>
-#include __THRUST_DEVICE_SYSTEM_COUNT_HEADER
-#undef __THRUST_DEVICE_SYSTEM_COUNT_HEADER
-
diff --git a/compat/thrust/system/detail/adl/equal.h b/compat/thrust/system/detail/adl/equal.h
deleted file mode 100644
index f933d4f93a..0000000000
--- a/compat/thrust/system/detail/adl/equal.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a equal of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the equal.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch equal
-
-#define __THRUST_HOST_SYSTEM_EQUAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/equal.h>
-#include __THRUST_HOST_SYSTEM_EQUAL_HEADER
-#undef __THRUST_HOST_SYSTEM_EQUAL_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_EQUAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/equal.h>
-#include __THRUST_DEVICE_SYSTEM_EQUAL_HEADER
-#undef __THRUST_DEVICE_SYSTEM_EQUAL_HEADER
-
diff --git a/compat/thrust/system/detail/adl/extrema.h b/compat/thrust/system/detail/adl/extrema.h
deleted file mode 100644
index c766570fc3..0000000000
--- a/compat/thrust/system/detail/adl/extrema.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a extrema of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the extrema.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch extrema
-
-#define __THRUST_HOST_SYSTEM_EXTREMA_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/extrema.h>
-#include __THRUST_HOST_SYSTEM_EXTREMA_HEADER
-#undef __THRUST_HOST_SYSTEM_EXTREMA_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/extrema.h>
-#include __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER
-#undef __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER
-
diff --git a/compat/thrust/system/detail/adl/fill.h b/compat/thrust/system/detail/adl/fill.h
deleted file mode 100644
index b241b8a3dc..0000000000
--- a/compat/thrust/system/detail/adl/fill.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the fill.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch fill
-
-#define __THRUST_HOST_SYSTEM_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/fill.h>
-#include __THRUST_HOST_SYSTEM_FILL_HEADER
-#undef __THRUST_HOST_SYSTEM_FILL_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/fill.h>
-#include __THRUST_DEVICE_SYSTEM_FILL_HEADER
-#undef __THRUST_DEVICE_SYSTEM_FILL_HEADER
-
diff --git a/compat/thrust/system/detail/adl/find.h b/compat/thrust/system/detail/adl/find.h
deleted file mode 100644
index 7c99f3e7b1..0000000000
--- a/compat/thrust/system/detail/adl/find.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the find.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch find
-
-#define __THRUST_HOST_SYSTEM_FIND_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/find.h>
-#include __THRUST_HOST_SYSTEM_FIND_HEADER
-#undef __THRUST_HOST_SYSTEM_FIND_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_FIND_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/find.h>
-#include __THRUST_DEVICE_SYSTEM_FIND_HEADER
-#undef __THRUST_DEVICE_SYSTEM_FIND_HEADER
-
diff --git a/compat/thrust/system/detail/adl/for_each.h b/compat/thrust/system/detail/adl/for_each.h
deleted file mode 100644
index 0b2717f34c..0000000000
--- a/compat/thrust/system/detail/adl/for_each.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the for_each.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch for_each
-
-#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/for_each.h>
-#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER
-#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/for_each.h>
-#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER
-#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER
-
diff --git a/compat/thrust/system/detail/adl/gather.h b/compat/thrust/system/detail/adl/gather.h
deleted file mode 100644
index da4c1d13d5..0000000000
--- a/compat/thrust/system/detail/adl/gather.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the gather.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch gather
-
-#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/gather.h>
-#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER
-#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/gather.h>
-#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER
-#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER
-
diff --git a/compat/thrust/system/detail/adl/generate.h b/compat/thrust/system/detail/adl/generate.h
deleted file mode 100644
index 3a988478f0..0000000000
--- a/compat/thrust/system/detail/adl/generate.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the generate.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch generate
-
-#define __THRUST_HOST_SYSTEM_GENERATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/generate.h>
-#include __THRUST_HOST_SYSTEM_GENERATE_HEADER
-#undef __THRUST_HOST_SYSTEM_GENERATE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_GENERATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/generate.h>
-#include __THRUST_DEVICE_SYSTEM_GENERATE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_GENERATE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/get_value.h b/compat/thrust/system/detail/adl/get_value.h
deleted file mode 100644
index ed4ef2cfef..0000000000
--- a/compat/thrust/system/detail/adl/get_value.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the get_value.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch get_value
-
-#define __THRUST_HOST_SYSTEM_GET_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/get_value.h>
-#include __THRUST_HOST_SYSTEM_GET_VALUE_HEADER
-#undef __THRUST_HOST_SYSTEM_GET_VALUE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/get_value.h>
-#include __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/inner_product.h b/compat/thrust/system/detail/adl/inner_product.h
deleted file mode 100644
index 18cc65b9c1..0000000000
--- a/compat/thrust/system/detail/adl/inner_product.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the inner_product.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch inner_product
-
-#define __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/inner_product.h>
-#include __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER
-#undef __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/inner_product.h>
-#include __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER
-#undef __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER
-
diff --git a/compat/thrust/system/detail/adl/iter_swap.h b/compat/thrust/system/detail/adl/iter_swap.h
deleted file mode 100644
index b302c25f7f..0000000000
--- a/compat/thrust/system/detail/adl/iter_swap.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the iter_swap.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch iter_swap
-
-#define __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/iter_swap.h>
-#include __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER
-#undef __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/iter_swap.h>
-#include __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER
-#undef __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER
-
diff --git a/compat/thrust/system/detail/adl/logical.h b/compat/thrust/system/detail/adl/logical.h
deleted file mode 100644
index 585f71af36..0000000000
--- a/compat/thrust/system/detail/adl/logical.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the logical.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch logical
-
-#define __THRUST_HOST_SYSTEM_LOGICAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/logical.h>
-#include __THRUST_HOST_SYSTEM_LOGICAL_HEADER
-#undef __THRUST_HOST_SYSTEM_LOGICAL_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/logical.h>
-#include __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER
-#undef __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER
-
diff --git a/compat/thrust/system/detail/adl/malloc_and_free.h b/compat/thrust/system/detail/adl/malloc_and_free.h
deleted file mode 100644
index 7d99a260df..0000000000
--- a/compat/thrust/system/detail/adl/malloc_and_free.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the malloc_and_free.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch malloc_and_free
-
-#define __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/malloc_and_free.h>
-#include __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER
-#undef __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/malloc_and_free.h>
-#include __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/merge.h b/compat/thrust/system/detail/adl/merge.h
deleted file mode 100644
index 59d8aceb49..0000000000
--- a/compat/thrust/system/detail/adl/merge.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the merge.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch merge
-
-#define __THRUST_HOST_SYSTEM_MERGE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/merge.h>
-#include __THRUST_HOST_SYSTEM_MERGE_HEADER
-#undef __THRUST_HOST_SYSTEM_MERGE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_MERGE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/merge.h>
-#include __THRUST_DEVICE_SYSTEM_MERGE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_MERGE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/mismatch.h b/compat/thrust/system/detail/adl/mismatch.h
deleted file mode 100644
index d2d1831374..0000000000
--- a/compat/thrust/system/detail/adl/mismatch.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the mismatch.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch mismatch
-
-#define __THRUST_HOST_SYSTEM_MISMATCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/mismatch.h>
-#include __THRUST_HOST_SYSTEM_MISMATCH_HEADER
-#undef __THRUST_HOST_SYSTEM_MISMATCH_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/mismatch.h>
-#include __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER
-#undef __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER
-
diff --git a/compat/thrust/system/detail/adl/partition.h b/compat/thrust/system/detail/adl/partition.h
deleted file mode 100644
index efdc60555b..0000000000
--- a/compat/thrust/system/detail/adl/partition.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the partition.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch partition
-
-#define __THRUST_HOST_SYSTEM_PARTITION_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/partition.h>
-#include __THRUST_HOST_SYSTEM_PARTITION_HEADER
-#undef __THRUST_HOST_SYSTEM_PARTITION_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_PARTITION_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/partition.h>
-#include __THRUST_DEVICE_SYSTEM_PARTITION_HEADER
-#undef __THRUST_DEVICE_SYSTEM_PARTITION_HEADER
-
diff --git a/compat/thrust/system/detail/adl/reduce.h b/compat/thrust/system/detail/adl/reduce.h
deleted file mode 100644
index afa00f9c60..0000000000
--- a/compat/thrust/system/detail/adl/reduce.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the reduce.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch reduce
-
-#define __THRUST_HOST_SYSTEM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce.h>
-#include __THRUST_HOST_SYSTEM_REDUCE_HEADER
-#undef __THRUST_HOST_SYSTEM_REDUCE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce.h>
-#include __THRUST_DEVICE_SYSTEM_REDUCE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_REDUCE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/reduce_by_key.h b/compat/thrust/system/detail/adl/reduce_by_key.h
deleted file mode 100644
index eac65b72d2..0000000000
--- a/compat/thrust/system/detail/adl/reduce_by_key.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the reduce_by_key.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch reduce_by_key
-
-#define __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce_by_key.h>
-#include __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER
-#undef __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce_by_key.h>
-#include __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER
-
diff --git a/compat/thrust/system/detail/adl/remove.h b/compat/thrust/system/detail/adl/remove.h
deleted file mode 100644
index 9d64be8da3..0000000000
--- a/compat/thrust/system/detail/adl/remove.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the remove.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch remove
-
-#define __THRUST_HOST_SYSTEM_REMOVE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/remove.h>
-#include __THRUST_HOST_SYSTEM_REMOVE_HEADER
-#undef __THRUST_HOST_SYSTEM_REMOVE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_REMOVE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/remove.h>
-#include __THRUST_DEVICE_SYSTEM_REMOVE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_REMOVE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/replace.h b/compat/thrust/system/detail/adl/replace.h
deleted file mode 100644
index e4d8bd22ac..0000000000
--- a/compat/thrust/system/detail/adl/replace.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the replace.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch replace
-
-#define __THRUST_HOST_SYSTEM_REPLACE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/replace.h>
-#include __THRUST_HOST_SYSTEM_REPLACE_HEADER
-#undef __THRUST_HOST_SYSTEM_REPLACE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_REPLACE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/replace.h>
-#include __THRUST_DEVICE_SYSTEM_REPLACE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_REPLACE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/reverse.h b/compat/thrust/system/detail/adl/reverse.h
deleted file mode 100644
index 8cbcfd833c..0000000000
--- a/compat/thrust/system/detail/adl/reverse.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the reverse.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch reverse
-
-#define __THRUST_HOST_SYSTEM_REVERSE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reverse.h>
-#include __THRUST_HOST_SYSTEM_REVERSE_HEADER
-#undef __THRUST_HOST_SYSTEM_REVERSE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_REVERSE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reverse.h>
-#include __THRUST_DEVICE_SYSTEM_REVERSE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_REVERSE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/scan.h b/compat/thrust/system/detail/adl/scan.h
deleted file mode 100644
index e70cd9fdda..0000000000
--- a/compat/thrust/system/detail/adl/scan.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the scan.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch scan
-
-#define __THRUST_HOST_SYSTEM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan.h>
-#include __THRUST_HOST_SYSTEM_SCAN_HEADER
-#undef __THRUST_HOST_SYSTEM_SCAN_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan.h>
-#include __THRUST_DEVICE_SYSTEM_SCAN_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SCAN_HEADER
-
diff --git a/compat/thrust/system/detail/adl/scan_by_key.h b/compat/thrust/system/detail/adl/scan_by_key.h
deleted file mode 100644
index 02c4b84751..0000000000
--- a/compat/thrust/system/detail/adl/scan_by_key.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the scan_by_key.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch scan_by_key
-
-#define __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan_by_key.h>
-#include __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER
-#undef __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan_by_key.h>
-#include __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER
-
diff --git a/compat/thrust/system/detail/adl/scatter.h b/compat/thrust/system/detail/adl/scatter.h
deleted file mode 100644
index b94b0d9892..0000000000
--- a/compat/thrust/system/detail/adl/scatter.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the scatter.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch scatter
-
-#define __THRUST_HOST_SYSTEM_SCATTER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scatter.h>
-#include __THRUST_HOST_SYSTEM_SCATTER_HEADER
-#undef __THRUST_HOST_SYSTEM_SCATTER_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SCATTER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scatter.h>
-#include __THRUST_DEVICE_SYSTEM_SCATTER_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SCATTER_HEADER
-
diff --git a/compat/thrust/system/detail/adl/sequence.h b/compat/thrust/system/detail/adl/sequence.h
deleted file mode 100644
index 07dcc7b7c1..0000000000
--- a/compat/thrust/system/detail/adl/sequence.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the sequence.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch sequence
-
-#define __THRUST_HOST_SYSTEM_SEQUENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sequence.h>
-#include __THRUST_HOST_SYSTEM_SEQUENCE_HEADER
-#undef __THRUST_HOST_SYSTEM_SEQUENCE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sequence.h>
-#include __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/set_operations.h b/compat/thrust/system/detail/adl/set_operations.h
deleted file mode 100644
index 9901b46851..0000000000
--- a/compat/thrust/system/detail/adl/set_operations.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the set_operations.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch set_operations
-
-#define __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/set_operations.h>
-#include __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER
-#undef __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/set_operations.h>
-#include __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER
-
diff --git a/compat/thrust/system/detail/adl/sort.h b/compat/thrust/system/detail/adl/sort.h
deleted file mode 100644
index afcb903f87..0000000000
--- a/compat/thrust/system/detail/adl/sort.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the sort.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch sort
-
-#define __THRUST_HOST_SYSTEM_SORT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sort.h>
-#include __THRUST_HOST_SYSTEM_SORT_HEADER
-#undef __THRUST_HOST_SYSTEM_SORT_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SORT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sort.h>
-#include __THRUST_DEVICE_SYSTEM_SORT_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SORT_HEADER
-
diff --git a/compat/thrust/system/detail/adl/swap_ranges.h b/compat/thrust/system/detail/adl/swap_ranges.h
deleted file mode 100644
index c0069369e0..0000000000
--- a/compat/thrust/system/detail/adl/swap_ranges.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the swap_ranges.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch swap_ranges
-
-#define __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/swap_ranges.h>
-#include __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER
-#undef __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/swap_ranges.h>
-#include __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER
-#undef __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER
-
diff --git a/compat/thrust/system/detail/adl/tabulate.h b/compat/thrust/system/detail/adl/tabulate.h
deleted file mode 100644
index cb1fdebd11..0000000000
--- a/compat/thrust/system/detail/adl/tabulate.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the tabulate.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch tabulate
-
-#define __THRUST_HOST_SYSTEM_TABULATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/tabulate.h>
-#include __THRUST_HOST_SYSTEM_TABULATE_HEADER
-#undef __THRUST_HOST_SYSTEM_TABULATE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_TABULATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/tabulate.h>
-#include __THRUST_DEVICE_SYSTEM_TABULATE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TABULATE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/temporary_buffer.h b/compat/thrust/system/detail/adl/temporary_buffer.h
deleted file mode 100644
index 66df0ea85b..0000000000
--- a/compat/thrust/system/detail/adl/temporary_buffer.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the temporary_buffer.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch get_temporary_buffer or return_temporary_buffer
-
-#define __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/temporary_buffer.h>
-#include __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER
-#undef __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/temporary_buffer.h>
-#include __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER
-
diff --git a/compat/thrust/system/detail/adl/transform.h b/compat/thrust/system/detail/adl/transform.h
deleted file mode 100644
index c9e6a01ea1..0000000000
--- a/compat/thrust/system/detail/adl/transform.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the transform.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch transform
-
-#define __THRUST_HOST_SYSTEM_TRANSFORM_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform.h>
-#include __THRUST_HOST_SYSTEM_TRANSFORM_HEADER
-#undef __THRUST_HOST_SYSTEM_TRANSFORM_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform.h>
-#include __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER
-
diff --git a/compat/thrust/system/detail/adl/transform_reduce.h b/compat/thrust/system/detail/adl/transform_reduce.h
deleted file mode 100644
index 0a5d97749f..0000000000
--- a/compat/thrust/system/detail/adl/transform_reduce.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the transform_reduce.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch transform_reduce
-
-#define __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_reduce.h>
-#include __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER
-#undef __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_reduce.h>
-#include __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/transform_scan.h b/compat/thrust/system/detail/adl/transform_scan.h
deleted file mode 100644
index 47c1dc3ae9..0000000000
--- a/compat/thrust/system/detail/adl/transform_scan.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the transform_scan.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch transform_scan
-
-#define __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_scan.h>
-#include __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER
-#undef __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_scan.h>
-#include __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER
-#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER
-
diff --git a/compat/thrust/system/detail/adl/uninitialized_copy.h b/compat/thrust/system/detail/adl/uninitialized_copy.h
deleted file mode 100644
index 7cb0b8e401..0000000000
--- a/compat/thrust/system/detail/adl/uninitialized_copy.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the uninitialized_copy.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch uninitialized_copy
-
-#define __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_copy.h>
-#include __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER
-#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_copy.h>
-#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER
-
diff --git a/compat/thrust/system/detail/adl/uninitialized_fill.h b/compat/thrust/system/detail/adl/uninitialized_fill.h
deleted file mode 100644
index 9f00b51622..0000000000
--- a/compat/thrust/system/detail/adl/uninitialized_fill.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the uninitialized_fill.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch uninitialized_fill
-
-#define __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_fill.h>
-#include __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER
-#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_fill.h>
-#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER
-#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER
-
diff --git a/compat/thrust/system/detail/adl/unique.h b/compat/thrust/system/detail/adl/unique.h
deleted file mode 100644
index 932ff58e07..0000000000
--- a/compat/thrust/system/detail/adl/unique.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the unique.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch unique
-
-#define __THRUST_HOST_SYSTEM_UNIQUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique.h>
-#include __THRUST_HOST_SYSTEM_UNIQUE_HEADER
-#undef __THRUST_HOST_SYSTEM_UNIQUE_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique.h>
-#include __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER
-#undef __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER
-
diff --git a/compat/thrust/system/detail/adl/unique_by_key.h b/compat/thrust/system/detail/adl/unique_by_key.h
deleted file mode 100644
index 30e6f2f2d1..0000000000
--- a/compat/thrust/system/detail/adl/unique_by_key.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a fill of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// the purpose of this header is to #include the unique_by_key.h header
-// of the host and device systems. It should be #included in any
-// code which uses adl to dispatch unique_by_key
-
-#define __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique_by_key.h>
-#include __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER
-#undef __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER
-
-#define __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique_by_key.h>
-#include __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER
-#undef __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER
-
diff --git a/compat/thrust/system/detail/bad_alloc.h b/compat/thrust/system/detail/bad_alloc.h
deleted file mode 100644
index bb73d1f006..0000000000
--- a/compat/thrust/system/detail/bad_alloc.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <new>
-#include <string>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-
-// define our own bad_alloc so we can set its .what()
-class bad_alloc
-  : public std::bad_alloc
-{
-  public:
-    inline bad_alloc(const std::string &w)
-      : std::bad_alloc(), m_what()
-    {
-      m_what = std::bad_alloc::what();
-      m_what += ": ";
-      m_what += w;
-    } // end bad_alloc()
-
-    inline virtual ~bad_alloc(void) throw () {};
-
-    inline virtual const char *what(void) const throw()
-    {
-      return m_what.c_str();
-    } // end what()
-
-  private:
-    std::string m_what;
-}; // end bad_alloc
-  
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/errno.h b/compat/thrust/system/detail/errno.h
deleted file mode 100644
index 34bc8cc568..0000000000
--- a/compat/thrust/system/detail/errno.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// The rationale for the existence of these apparently redundant definitions is
-// to provide them portably and to avoid bringing in system headers which might
-// pollute the global namespace. These identifiers are in lowercase to avoid
-// colliding with the real macros in errno.h.
-
-namespace thrust
-{
-
-namespace system
-{
-
-namespace detail
-{
-
-static const int eafnosupport    = 9901;
-static const int eaddrinuse      = 9902;
-static const int eaddrnotavail   = 9903;
-static const int eisconn         = 9904;
-static const int ebadmsg         = 9905;
-static const int econnaborted    = 9906;
-static const int ealready        = 9907;
-static const int econnrefused    = 9908;
-static const int econnreset      = 9909;
-static const int edestaddrreq    = 9910;
-static const int ehostunreach    = 9911;
-static const int eidrm           = 9912;
-static const int emsgsize        = 9913;
-static const int enetdown        = 9914;
-static const int enetreset       = 9915;
-static const int enetunreach     = 9916;
-static const int enobufs         = 9917;
-static const int enolink         = 9918;
-static const int enodata         = 9919;
-static const int enomsg          = 9920;
-static const int enoprotoopt     = 9921;
-static const int enosr           = 9922;
-static const int enotsock        = 9923;
-static const int enostr          = 9924;
-static const int enotconn        = 9925;
-static const int enotsup         = 9926;
-static const int ecanceled       = 9927;
-static const int einprogress     = 9928;
-static const int eopnotsupp      = 9929;
-static const int ewouldblock     = 9930;
-static const int eownerdead      = 9931;
-static const int eproto          = 9932;
-static const int eprotonosupport = 9933;
-static const int enotrecoverable = 9934;
-static const int etime           = 9935;
-static const int etxtbsy         = 9936;
-static const int etimedout       = 9938;
-static const int eloop           = 9939;
-static const int eoverflow       = 9940;
-static const int eprototype      = 9941;
-static const int enosys          = 9942;
-static const int einval          = 9943;
-static const int erange          = 9944;
-static const int eilseq          = 9945;
-static const int e2big           = 9946;
-static const int edom            = 9947;
-static const int efault          = 9948;
-static const int ebadf           = 9949;
-static const int epipe           = 9950;
-static const int exdev           = 9951;
-static const int ebusy           = 9952;
-static const int enotempty       = 9953;
-static const int enoexec         = 9954;
-static const int eexist          = 9955;
-static const int efbig           = 9956;
-static const int enametoolong    = 9957;
-static const int enotty          = 9958;
-static const int eintr           = 9959;
-static const int espipe          = 9960;
-static const int eio             = 9961;
-static const int eisdir          = 9962;
-static const int echild          = 9963;
-static const int enolck          = 9964;
-static const int enospc          = 9965;
-static const int enxio           = 9966;
-static const int enodev          = 9967;
-static const int enoent          = 9968;
-static const int esrch           = 9969;
-static const int enotdir         = 9970;
-static const int enomem          = 9971;
-static const int eperm           = 9972;
-static const int eacces          = 9973;
-static const int erofs           = 9974;
-static const int edeadlk         = 9975;
-static const int eagain          = 9976;
-static const int enfile          = 9977;
-static const int emfile          = 9978;
-static const int emlink          = 9979;
-
-} // end detail
-
-} // end system
-
-} // end thrust
-
diff --git a/compat/thrust/system/detail/error_category.inl b/compat/thrust/system/detail/error_category.inl
deleted file mode 100644
index 8e19c89db5..0000000000
--- a/compat/thrust/system/detail/error_category.inl
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/system/error_code.h>
-#include <thrust/system/detail/errno.h>
-#include <thrust/functional.h>
-#include <cstring>
-
-namespace thrust
-{
-
-namespace system
-{
-
-error_category
-  ::~error_category(void)
-{
-  ;
-} // end error_category::~error_category()
-
-
-error_condition error_category
-  ::default_error_condition(int ev) const
-{
-  return error_condition(ev, *this);
-} // end error_category::default_error_condition()
-
-
-bool error_category
-  ::equivalent(int code, const error_condition &condition) const
-{
-  return default_error_condition(code) == condition;
-} // end error_condition::equivalent()
-
-
-bool error_category
-  ::equivalent(const error_code &code, int condition) const
-{
-  bool result = (this->operator==(code.category())) && (code.value() == condition);
-  return result;
-} // end error_code::equivalent()
-
-
-bool error_category
-  ::operator==(const error_category &rhs) const
-{
-  return this == &rhs;
-} // end error_category::operator==()
-
-
-bool error_category
-  ::operator!=(const error_category &rhs) const
-{
-  return !this->operator==(rhs);
-} // end error_category::operator!=()
-
-
-bool error_category
-  ::operator<(const error_category &rhs) const
-{
-  return thrust::less<const error_category*>()(this,&rhs);
-} // end error_category::operator<()
-
-
-namespace detail
-{
-
-
-class generic_error_category
-  : public error_category
-{
-  public:
-    inline generic_error_category(void) {}
-
-    inline virtual const char *name(void) const
-    {
-      return "generic";
-    }
-
-    inline virtual std::string message(int ev) const
-    {
-      static const std::string unknown_err("Unknown error");
-
-      // XXX strerror is not thread-safe:
-      //     prefer strerror_r (which is not provided on windows)
-      const char *c_str = std::strerror(ev);
-      return c_str ? std::string(c_str) : unknown_err;
-    }
-}; // end generic_category_result
-
-
-class system_error_category
-  : public error_category
-{
-  public:
-    inline system_error_category(void) {}
-
-    inline virtual const char *name(void) const
-    {
-      return "system";
-    }
-
-    inline virtual std::string message(int ev) const
-    {
-      return generic_category().message(ev);
-    }
-
-    inline virtual error_condition default_error_condition(int ev) const
-    {
-      using namespace errc;
-
-      switch(ev)
-      {
-        case eafnosupport:    return make_error_condition(address_family_not_supported);
-        case eaddrinuse:      return make_error_condition(address_in_use);
-        case eaddrnotavail:   return make_error_condition(address_not_available);
-        case eisconn:         return make_error_condition(already_connected);
-        case e2big:           return make_error_condition(argument_list_too_long);
-        case edom:            return make_error_condition(argument_out_of_domain);
-        case efault:          return make_error_condition(bad_address);
-        case ebadf:           return make_error_condition(bad_file_descriptor);
-        case ebadmsg:         return make_error_condition(bad_message);
-        case epipe:           return make_error_condition(broken_pipe);
-        case econnaborted:    return make_error_condition(connection_aborted);
-        case ealready:        return make_error_condition(connection_already_in_progress);
-        case econnrefused:    return make_error_condition(connection_refused);
-        case econnreset:      return make_error_condition(connection_reset);
-        case exdev:           return make_error_condition(cross_device_link);
-        case edestaddrreq:    return make_error_condition(destination_address_required);
-        case ebusy:           return make_error_condition(device_or_resource_busy);
-        case enotempty:       return make_error_condition(directory_not_empty);
-        case enoexec:         return make_error_condition(executable_format_error);
-        case eexist:          return make_error_condition(file_exists);
-        case efbig:           return make_error_condition(file_too_large);
-        case enametoolong:    return make_error_condition(filename_too_long);
-        case enosys:          return make_error_condition(function_not_supported);
-        case ehostunreach:    return make_error_condition(host_unreachable);
-        case eidrm:           return make_error_condition(identifier_removed);
-        case eilseq:          return make_error_condition(illegal_byte_sequence);
-        case enotty:          return make_error_condition(inappropriate_io_control_operation);
-        case eintr:           return make_error_condition(interrupted);
-        case einval:          return make_error_condition(invalid_argument);
-        case espipe:          return make_error_condition(invalid_seek);
-        case eio:             return make_error_condition(io_error);
-        case eisdir:          return make_error_condition(is_a_directory);
-        case emsgsize:        return make_error_condition(message_size);
-        case enetdown:        return make_error_condition(network_down);
-        case enetreset:       return make_error_condition(network_reset);
-        case enetunreach:     return make_error_condition(network_unreachable);
-        case enobufs:         return make_error_condition(no_buffer_space);
-        case echild:          return make_error_condition(no_child_process);
-        case enolink:         return make_error_condition(no_link);
-        case enolck:          return make_error_condition(no_lock_available);
-        case enodata:         return make_error_condition(no_message_available);
-        case enomsg:          return make_error_condition(no_message);
-        case enoprotoopt:     return make_error_condition(no_protocol_option);
-        case enospc:          return make_error_condition(no_space_on_device);
-        case enosr:           return make_error_condition(no_stream_resources);
-        case enxio:           return make_error_condition(no_such_device_or_address);
-        case enodev:          return make_error_condition(no_such_device);
-        case enoent:          return make_error_condition(no_such_file_or_directory);
-        case esrch:           return make_error_condition(no_such_process);
-        case enotdir:         return make_error_condition(not_a_directory);
-        case enotsock:        return make_error_condition(not_a_socket);
-        case enostr:          return make_error_condition(not_a_stream);
-        case enotconn:        return make_error_condition(not_connected);
-        case enomem:          return make_error_condition(not_enough_memory);
-        case enotsup:         return make_error_condition(not_supported);
-        case ecanceled:       return make_error_condition(operation_canceled);
-        case einprogress:     return make_error_condition(operation_in_progress);
-        case eperm:           return make_error_condition(operation_not_permitted);
-        case eopnotsupp:      return make_error_condition(operation_not_supported);
-        case ewouldblock:     return make_error_condition(operation_would_block);
-        case eownerdead:      return make_error_condition(owner_dead);
-        case eacces:          return make_error_condition(permission_denied);
-        case eproto:          return make_error_condition(protocol_error);
-        case eprotonosupport: return make_error_condition(protocol_not_supported);
-        case erofs:           return make_error_condition(read_only_file_system);
-        case edeadlk:         return make_error_condition(resource_deadlock_would_occur);
-        case eagain:          return make_error_condition(resource_unavailable_try_again);
-        case erange:          return make_error_condition(result_out_of_range);
-        case enotrecoverable: return make_error_condition(state_not_recoverable);
-        case etime:           return make_error_condition(stream_timeout);
-        case etxtbsy:         return make_error_condition(text_file_busy);
-        case etimedout:       return make_error_condition(timed_out);
-        case enfile:          return make_error_condition(too_many_files_open_in_system);
-        case emfile:          return make_error_condition(too_many_files_open);
-        case emlink:          return make_error_condition(too_many_links);
-        case eloop:           return make_error_condition(too_many_symbolic_link_levels);
-        case eoverflow:       return make_error_condition(value_too_large);
-        case eprototype:      return make_error_condition(wrong_protocol_type);
-        default:              return error_condition(ev,system_category());
-      }
-    }
-}; // end system_category_result
-
-
-} // end detail
-
-
-const error_category &generic_category(void)
-{
-  static const detail::generic_error_category result;
-  return result;
-}
-
-
-const error_category &system_category(void)
-{
-  static const detail::system_error_category result;
-  return result;
-}
-
-
-} // end system
-
-} // end thrust
-
diff --git a/compat/thrust/system/detail/error_code.inl b/compat/thrust/system/detail/error_code.inl
deleted file mode 100644
index 0cf86b4821..0000000000
--- a/compat/thrust/system/detail/error_code.inl
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/system/error_code.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-error_code
-  ::error_code(void)
-    :m_val(0),m_cat(&system_category())
-{
-  ;
-} // end error_code::error_code()
-
-
-error_code
-  ::error_code(int val, const error_category &cat)
-    :m_val(val),m_cat(&cat)
-{
-  ;
-} // end error_code::error_code()
-
-
-template <typename ErrorCodeEnum>
-  error_code
-    ::error_code(ErrorCodeEnum e
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-                 , typename thrust::detail::enable_if<is_error_code_enum<ErrorCodeEnum>::value>::type *
-#endif // THRUST_HOST_COMPILER_MSVC
-                )
-{
-  *this = make_error_code(e);
-} // end error_code::error_code()
-
-
-void error_code
-  ::assign(int val, const error_category &cat)
-{
-  m_val = val;
-  m_cat = &cat;
-} // end error_code::assign()
-
-
-template <typename ErrorCodeEnum>
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-  typename thrust::detail::enable_if<is_error_code_enum<ErrorCodeEnum>::value, error_code>::type &
-#else
-  error_code &
-#endif // THRUST_HOST_COMPILER_MSVC
-    error_code
-      ::operator=(ErrorCodeEnum e)
-{
-  *this = make_error_code(e);
-  return *this;
-} // end error_code::operator=()
-
-
-void error_code
-  ::clear(void)
-{
-  m_val = 0;
-  m_cat = &system_category();
-} // end error_code::clear()
-
-
-int error_code
-  ::value(void) const
-{
-  return m_val;
-} // end error_code::value()
-
-
-const error_category &error_code
-  ::category(void) const
-{
-  return *m_cat;
-} // end error_code::category()
-
-
-error_condition error_code
-  ::default_error_condition(void) const
-{
-  return category().default_error_condition(value());
-} // end error_code::default_error_condition()
-
-
-std::string error_code
-  ::message(void) const
-{
-  return category().message(value());
-} // end error_code::message()
-
-
-error_code
-  ::operator bool (void) const
-{
-  return value() != 0;
-} // end error_code::operator bool ()
-
-
-error_code make_error_code(errc::errc_t e)
-{
-  return error_code(static_cast<int>(e), generic_category());
-} // end make_error_code()
-
-
-bool operator<(const error_code &lhs, const error_code &rhs)
-{
-  bool result = lhs.category().operator<(rhs.category());
-  result = result || lhs.category().operator==(rhs.category());
-  result = result || lhs.value() < rhs.value();
-  return result;
-} // end operator==()
-
-
-template<typename charT, typename traits>
-  std::basic_ostream<charT,traits>&
-    operator<<(std::basic_ostream<charT,traits> &os, const error_code &ec)
-{
-  return os << ec.category().name() << ':' << ec.value();
-} // end operator<<()
-
-
-bool operator==(const error_code &lhs, const error_code &rhs)
-{
-  return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value();
-} // end operator==()
-
-
-bool operator==(const error_code &lhs, const error_condition &rhs)
-{
-  return lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value());
-} // end operator==()
-
-
-bool operator==(const error_condition &lhs, const error_code &rhs)
-{
-  return rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value());
-} // end operator==()
-
-
-bool operator==(const error_condition &lhs, const error_condition &rhs)
-{
-  return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value();
-} // end operator==()
-
-
-bool operator!=(const error_code &lhs, const error_code &rhs)
-{
-  return !(lhs == rhs);
-} // end operator!=()
-
-
-bool operator!=(const error_code &lhs, const error_condition &rhs)
-{
-  return !(lhs == rhs);
-} // end operator!=()
-
-
-bool operator!=(const error_condition &lhs, const error_code &rhs)
-{
-  return !(lhs == rhs);
-} // end operator!=()
-
-
-bool operator!=(const error_condition &lhs, const error_condition &rhs)
-{
-  return !(lhs == rhs);
-} // end operator!=()
-
-
-} // end system
-
-} // end thrust
-
diff --git a/compat/thrust/system/detail/error_condition.inl b/compat/thrust/system/detail/error_condition.inl
deleted file mode 100644
index 00fbaf091d..0000000000
--- a/compat/thrust/system/detail/error_condition.inl
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/system/detail/error_condition.inl>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-error_condition
-  ::error_condition(void)
-    :m_val(0),m_cat(&generic_category())
-{
-  ;
-} // end error_condition::error_condition()
-
-
-error_condition
-  ::error_condition(int val, const error_category &cat)
-    :m_val(val),m_cat(&cat)
-{
-  ;
-} // end error_condition::error_condition()
-
-
-template<typename ErrorConditionEnum>
-  error_condition
-    ::error_condition(ErrorConditionEnum e
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-                      , typename thrust::detail::enable_if<is_error_condition_enum<ErrorConditionEnum>::value>::type *
-#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-                     )
-{
-  *this = make_error_condition(e);
-} // end error_condition::error_condition()
-
-
-void error_condition
-  ::assign(int val, const error_category &cat)
-{
-  m_val = val;
-  m_cat = &cat;
-} // end error_category::assign()
-
-
-template<typename ErrorConditionEnum>
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-  typename thrust::detail::enable_if<is_error_condition_enum<ErrorConditionEnum>::value, error_condition>::type &
-#else
-  error_condition &
-#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-    error_condition
-      ::operator=(ErrorConditionEnum e)
-{
-  *this = make_error_condition(e);
-  return *this;
-} // end error_condition::operator=()
-
-
-void error_condition
-  ::clear(void)
-{
-  m_val = 0;
-  m_cat = &generic_category();
-} // end error_condition::clear()
-
-
-int error_condition
-  ::value(void) const
-{
-  return m_val;
-} // end error_condition::value()
-
-
-const error_category &error_condition
-  ::category(void) const
-{
-  return *m_cat;
-} // end error_condition::category()
-
-
-std::string error_condition
-  ::message(void) const
-{
-  return category().message(value());
-} // end error_condition::message()
-
-
-error_condition
-  ::operator bool (void) const
-{
-  return value() != 0;
-} // end error_condition::operator bool ()
-
-
-error_condition make_error_condition(errc::errc_t e)
-{
-  return error_condition(static_cast<int>(e), generic_category());
-} // end make_error_condition()
-
-
-bool operator<(const error_condition &lhs,
-               const error_condition &rhs)
-{
-  return lhs.category().operator<(rhs.category()) || (lhs.category().operator==(rhs.category()) && (lhs.value() < rhs.value()));
-} // end operator<()
-
-
-} // end system
-
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/adjacent_difference.h b/compat/thrust/system/detail/generic/adjacent_difference.h
deleted file mode 100644
index bb340df490..0000000000
--- a/compat/thrust/system/detail/generic/adjacent_difference.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.h
- *  \brief Generic implementation of adjacent_difference.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename DerivedPolicy, class InputIterator, class OutputIterator>
-OutputIterator adjacent_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result);
-
-template <typename DerivedPolicy, class InputIterator, class OutputIterator, class BinaryFunction>
-OutputIterator adjacent_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/adjacent_difference.inl>
-
diff --git a/compat/thrust/system/detail/generic/adjacent_difference.inl b/compat/thrust/system/detail/generic/adjacent_difference.inl
deleted file mode 100644
index 619b29f860..0000000000
--- a/compat/thrust/system/detail/generic/adjacent_difference.inl
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/adjacent_difference.h>
-#include <thrust/adjacent_difference.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/transform.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename DerivedPolicy, class InputIterator, class OutputIterator>
-OutputIterator adjacent_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-  thrust::minus<InputType> binary_op;
-
-  return thrust::adjacent_difference(exec, first, last, result, binary_op);
-} // end adjacent_difference()
-
-template <typename DerivedPolicy, class InputIterator, class OutputIterator, class BinaryFunction>
-OutputIterator adjacent_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                   InputIterator first, InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-
-  if(first == last)
-  {
-    // empty range, nothing to do
-    return result; 
-  }
-  else 
-  {
-    // an in-place operation is requested, copy the input and call the entry point
-    // XXX a special-purpose kernel would be faster here since
-    // only block boundaries need to be copied
-    thrust::detail::temporary_array<InputType, DerivedPolicy> input_copy(exec, first, last);
-    
-    *result = *first;
-    thrust::transform(exec, input_copy.begin() + 1, input_copy.end(), input_copy.begin(), result + 1, binary_op); 
-  }
-
-  return result + (last - first);
-}
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/advance.h b/compat/thrust/system/detail/generic/advance.h
deleted file mode 100644
index 249aac7e54..0000000000
--- a/compat/thrust/system/detail/generic/advance.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/advance.inl>
-
diff --git a/compat/thrust/system/detail/generic/advance.inl b/compat/thrust/system/detail/generic/advance.inl
deleted file mode 100644
index b95737ad9e..0000000000
--- a/compat/thrust/system/detail/generic/advance.inl
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/advance.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template<typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n, thrust::incrementable_traversal_tag)
-{
-  while(n)
-  {
-    ++i;
-    --n;
-  } // end while
-} // end advance()
-
-template<typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n, thrust::random_access_traversal_tag)
-{
-  i += n;
-} // end advance()
-
-} // end detail
-
-template<typename InputIterator, typename Distance>
-void advance(InputIterator& i, Distance n)
-{
-  // dispatch on iterator traversal
-  thrust::system::detail::generic::detail::advance(i, n,
-    typename thrust::iterator_traversal<InputIterator>::type());
-} // end advance()
-
-} // end namespace detail
-} // end namespace generic
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/binary_search.h b/compat/thrust/system/detail/generic/binary_search.h
deleted file mode 100644
index 7fd6c506ee..0000000000
--- a/compat/thrust/system/detail/generic/binary_search.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.h
- *  \brief Generic implementations of binary search functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-ForwardIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec, 
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-ForwardIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec, 
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-bool binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                   ForwardIterator begin,
-                   ForwardIterator end,
-                   const T& value);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                   ForwardIterator begin,
-                   ForwardIterator end,
-                   const T& value, 
-                   StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output,
-                           StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output,
-                           StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                             ForwardIterator begin, 
-                             ForwardIterator end,
-                             InputIterator values_begin, 
-                             InputIterator values_end,
-                             OutputIterator output);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                             ForwardIterator begin, 
-                             ForwardIterator end,
-                             InputIterator values_begin, 
-                             InputIterator values_end,
-                             OutputIterator output,
-                             StrictWeakOrdering comp);
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-thrust::pair<ForwardIterator,ForwardIterator>
-equal_range(thrust::execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable &value);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable, typename StrictWeakOrdering>
-thrust::pair<ForwardIterator,ForwardIterator>
-equal_range(thrust::execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable &value,
-            StrictWeakOrdering comp);
-
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/binary_search.inl>
-
diff --git a/compat/thrust/system/detail/generic/binary_search.inl b/compat/thrust/system/detail/generic/binary_search.inl
deleted file mode 100644
index 151ac0ea35..0000000000
--- a/compat/thrust/system/detail/generic/binary_search.inl
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.inl
- *  \brief Inline file for binary_search.h
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/distance.h>
-#include <thrust/functional.h>
-#include <thrust/binary_search.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/binary_search.h>
-
-#include <thrust/for_each.h>
-#include <thrust/detail/function.h>
-#include <thrust/system/detail/generic/scalar/binary_search.h>
-
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace detail
-{
-
-// XXX WAR circular #inclusion with this forward declaration
-template<typename,typename> class temporary_array;
-
-} // end detail
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-
-// short names to avoid nvcc bug
-struct lbf
-{
-    template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
-    __host__ __device__
-    typename thrust::iterator_traits<RandomAccessIterator>::difference_type
-    operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp)
-    {
-        return thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp) - begin;
-    }
-};
-
-struct ubf
-{
-    template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
-        __host__ __device__
-        typename thrust::iterator_traits<RandomAccessIterator>::difference_type
-     operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){
-         return thrust::system::detail::generic::scalar::upper_bound(begin, end, value, comp) - begin;
-     }
-};
-
-struct bsf
-{
-    template <typename RandomAccessIterator, typename T, typename StrictWeakOrdering>
-        __host__ __device__
-     bool operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){
-         RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp);
-
-         thrust::detail::host_device_function<StrictWeakOrdering,bool> wrapped_comp(comp);
-
-         return iter != end && !wrapped_comp(value, *iter);
-     }
-};
-
-
-template <typename ForwardIterator, typename StrictWeakOrdering, typename BinarySearchFunction>
-struct binary_search_functor
-{
-    ForwardIterator begin;
-    ForwardIterator end;
-    StrictWeakOrdering comp;
-    BinarySearchFunction func;
-
-    binary_search_functor(ForwardIterator begin, ForwardIterator end, StrictWeakOrdering comp, BinarySearchFunction func)
-        : begin(begin), end(end), comp(comp), func(func) {}
-
-    template <typename Tuple>
-        __host__ __device__
-        void operator()(Tuple t)
-        {
-            thrust::get<1>(t) = func(begin, end, thrust::get<0>(t), comp);
-        }
-}; // binary_search_functor
-
-
-// Vector Implementation
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering, typename BinarySearchFunction>
-OutputIterator binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                             ForwardIterator begin, 
-                             ForwardIterator end,
-                             InputIterator values_begin, 
-                             InputIterator values_end,
-                             OutputIterator output,
-                             StrictWeakOrdering comp,
-                             BinarySearchFunction func)
-{
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(values_begin, output)),
-                     thrust::make_zip_iterator(thrust::make_tuple(values_end, output + thrust::distance(values_begin, values_end))),
-                     detail::binary_search_functor<ForwardIterator, StrictWeakOrdering, BinarySearchFunction>(begin, end, comp, func));
-
-    return output + thrust::distance(values_begin, values_end);
-}
-
-   
-
-// Scalar Implementation
-template <typename OutputType, typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering, typename BinarySearchFunction>
-OutputType binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator begin,
-                         ForwardIterator end,
-                         const T& value, 
-                         StrictWeakOrdering comp,
-                         BinarySearchFunction func)
-{
-    // use the vectorized path to implement the scalar version
-
-    // allocate device buffers for value and output
-    thrust::detail::temporary_array<T,DerivedPolicy>          d_value(exec,1);
-    thrust::detail::temporary_array<OutputType,DerivedPolicy> d_output(exec,1);
-
-    // copy value to device
-    d_value[0] = value;
-
-    // perform the query
-    thrust::system::detail::generic::detail::binary_search(exec, begin, end, d_value.begin(), d_value.end(), d_output.begin(), comp, func);
-
-    // copy result to host and return
-    return d_output[0];
-}
-   
-} // end namespace detail
-
-
-//////////////////////
-// Scalar Functions //
-//////////////////////
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-ForwardIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value)
-{
-  return thrust::lower_bound(exec, begin, end, value, thrust::less<T>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::difference_type difference_type;
-  
-  return begin + detail::binary_search<difference_type>(exec, begin, end, value, comp, detail::lbf());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-ForwardIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value)
-{
-  return thrust::upper_bound(exec, begin, end, value, thrust::less<T>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::difference_type difference_type;
-  
-  return begin + detail::binary_search<difference_type>(exec, begin, end, value, comp, detail::ubf());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T>
-bool binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                   ForwardIterator begin,
-                   ForwardIterator end,
-                   const T& value)
-{
-  return thrust::binary_search(exec, begin, end, value, thrust::less<T>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                   ForwardIterator begin,
-                   ForwardIterator end,
-                   const T& value, 
-                   StrictWeakOrdering comp)
-{
-  return detail::binary_search<bool>(exec, begin, end, value, comp, detail::bsf());
-}
-
-
-//////////////////////
-// Vector Functions //
-//////////////////////
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type ValueType;
-
-  return thrust::lower_bound(exec, begin, end, values_begin, values_end, output, thrust::less<ValueType>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator lower_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-  return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::lbf());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type ValueType;
-
-  return thrust::upper_bound(exec, begin, end, values_begin, values_end, output, thrust::less<ValueType>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator upper_bound(thrust::execution_policy<DerivedPolicy> &exec,
-                           ForwardIterator begin, 
-                           ForwardIterator end,
-                           InputIterator values_begin, 
-                           InputIterator values_end,
-                           OutputIterator output,
-                           StrictWeakOrdering comp)
-{
-  return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::ubf());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator>
-OutputIterator binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                             ForwardIterator begin, 
-                             ForwardIterator end,
-                             InputIterator values_begin, 
-                             InputIterator values_end,
-                             OutputIterator output)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type ValueType;
-
-  return thrust::binary_search(exec, begin, end, values_begin, values_end, output, thrust::less<ValueType>());
-}
-
-template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename OutputIterator, typename StrictWeakOrdering>
-OutputIterator binary_search(thrust::execution_policy<DerivedPolicy> &exec,
-                             ForwardIterator begin, 
-                             ForwardIterator end,
-                             InputIterator values_begin, 
-                             InputIterator values_end,
-                             OutputIterator output,
-                             StrictWeakOrdering comp)
-{
-  return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::bsf());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename LessThanComparable>
-thrust::pair<ForwardIterator,ForwardIterator>
-equal_range(thrust::execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const LessThanComparable &value)
-{
-  return thrust::equal_range(exec, first, last, value, thrust::less<LessThanComparable>());
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-thrust::pair<ForwardIterator,ForwardIterator>
-equal_range(thrust::execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value,
-            StrictWeakOrdering comp)
-{
-  ForwardIterator lb = thrust::lower_bound(exec, first, last, value, comp);
-  ForwardIterator ub = thrust::upper_bound(exec, first, last, value, comp);
-  return thrust::make_pair(lb, ub);
-}
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/copy.h b/compat/thrust/system/detail/generic/copy.h
deleted file mode 100644
index 8df98fe67f..0000000000
--- a/compat/thrust/system/detail/generic/copy.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(thrust::execution_policy<DerivedPolicy> &exec,
-                      InputIterator  first,
-                      InputIterator  last,
-                      OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(thrust::execution_policy<DerivedPolicy> &exec,
-                        InputIterator  first,
-                        Size           n,
-                        OutputIterator result);
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
-#include <thrust/system/detail/generic/copy.inl>
-
diff --git a/compat/thrust/system/detail/generic/copy.inl b/compat/thrust/system/detail/generic/copy.inl
deleted file mode 100644
index e081015f8e..0000000000
--- a/compat/thrust/system/detail/generic/copy.inl
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/copy.h>
-#include <thrust/functional.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/transform.h>
-#include <thrust/for_each.h>
-#include <thrust/tuple.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/iterator/detail/minimum_system.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(thrust::execution_policy<DerivedPolicy> &exec,
-                      InputIterator                            first,
-                      InputIterator                            last,
-                      OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type T;
-  return thrust::transform(exec, first, last, result, thrust::identity<T>());
-} // end copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(thrust::execution_policy<DerivedPolicy> &exec,
-                        InputIterator                            first,
-                        Size                                     n,
-                        OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type value_type;
-  typedef thrust::identity<value_type>                         xfrm_type;
-
-  // XXX why do we need to do this? figure out why, and then see if we can do without
-  typedef typename thrust::detail::unary_transform_functor<DerivedPolicy,xfrm_type>::type functor_type;
-
-  typedef thrust::tuple<InputIterator,OutputIterator> iterator_tuple;
-  typedef thrust::zip_iterator<iterator_tuple>        zip_iter;
-
-  zip_iter zipped = thrust::make_zip_iterator(thrust::make_tuple(first,result));
-
-  return thrust::get<1>(thrust::for_each_n(exec, zipped, n, functor_type(xfrm_type())).get_iterator_tuple());
-} // end copy_n()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/copy_if.h b/compat/thrust/system/detail/generic/copy_if.h
deleted file mode 100644
index 183f012a03..0000000000
--- a/compat/thrust/system/detail/generic/copy_if.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-   OutputIterator copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator result,
-                          Predicate pred);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/copy_if.inl>
-
diff --git a/compat/thrust/system/detail/generic/copy_if.inl b/compat/thrust/system/detail/generic/copy_if.inl
deleted file mode 100644
index 145561c9bc..0000000000
--- a/compat/thrust/system/detail/generic/copy_if.inl
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/copy_if.h>
-#include <thrust/detail/copy_if.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/functional.h>
-#include <thrust/distance.h>
-#include <thrust/transform.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/scan.h>
-#include <thrust/scatter.h>
-#include <limits>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template<typename IndexType,
-         typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-OutputIterator copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 first,
-                       InputIterator1 last,
-                       InputIterator2 stencil,
-                       OutputIterator result,
-                       Predicate pred)
-{
-    __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(IndexType n = thrust::distance(first, last));
-
-    // compute {0,1} predicates
-    thrust::detail::temporary_array<IndexType, DerivedPolicy> predicates(exec, n);
-    thrust::transform(exec,
-                      stencil,
-                      stencil + n,
-                      predicates.begin(),
-                      thrust::detail::predicate_to_integral<Predicate,IndexType>(pred));
-
-    // scan {0,1} predicates
-    thrust::detail::temporary_array<IndexType, DerivedPolicy> scatter_indices(exec, n);
-    thrust::exclusive_scan(exec,
-                           predicates.begin(),
-                           predicates.end(),
-                           scatter_indices.begin(),
-                           static_cast<IndexType>(0),
-                           thrust::plus<IndexType>());
-
-    // scatter the true elements
-    thrust::scatter_if(exec,
-                       first,
-                       last,
-                       scatter_indices.begin(),
-                       predicates.begin(),
-                       result,
-                       thrust::identity<IndexType>());
-
-    // find the end of the new sequence
-    IndexType output_size = scatter_indices[n - 1] + predicates[n - 1];
-
-    return result + output_size;
-}
-
-} // end namespace detail
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator first,
-                         InputIterator last,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  // XXX it's potentially expensive to send [first,last) twice
-  //     we should probably specialize this case for POD
-  //     since we can safely keep the input in a temporary instead
-  //     of doing two loads
-  return thrust::copy_if(exec, first, last, first, result, pred);
-} // end copy_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-   OutputIterator copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator result,
-                          Predicate pred)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::difference_type difference_type;
-  
-  // empty sequence
-  if(first == last)
-    return result;
-  
-  difference_type n = thrust::distance(first, last);
-  
-  // create an unsigned version of n (we know n is positive from the comparison above)
-  // to avoid a warning in the compare below
-  typename thrust::detail::make_unsigned<difference_type>::type unsigned_n(n);
-  
-  // use 32-bit indices when possible (almost always)
-  if(sizeof(difference_type) > sizeof(unsigned int) && unsigned_n > (std::numeric_limits<unsigned int>::max)())
-  {
-    result = detail::copy_if<difference_type>(exec, first, last, stencil, result, pred);
-  } // end if
-  else
-  {
-    result = detail::copy_if<unsigned int>(exec, first, last, stencil, result, pred);
-  } // end else
-
-  return result;
-} // end copy_if()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/count.h b/compat/thrust/system/detail/generic/count.h
deleted file mode 100644
index bc4899e6d0..0000000000
--- a/compat/thrust/system/detail/generic/count.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename DerivedPolicy, typename InputIterator, typename EqualityComparable>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, const EqualityComparable& value);
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count_if(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/count.inl>
-
diff --git a/compat/thrust/system/detail/generic/count.inl b/compat/thrust/system/detail/generic/count.inl
deleted file mode 100644
index e3ab8714b7..0000000000
--- a/compat/thrust/system/detail/generic/count.inl
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/count.h>
-#include <thrust/transform_reduce.h>
-#include <thrust/detail/internal_functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename InputType, typename Predicate, typename CountType>
-struct count_if_transform
-{
-  __host__ __device__ 
-  count_if_transform(Predicate _pred) : pred(_pred){}
-
-  __host__ __device__
-  CountType operator()(const InputType& val)
-  {
-    if(pred(val))
-      return 1;
-    else
-      return 0;
-  } // end operator()
-
-  Predicate pred;
-}; // end count_if_transform
-
-template <typename DerivedPolicy, typename InputIterator, typename EqualityComparable>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, const EqualityComparable& value)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-  
-  // XXX use placeholder expression here
-  return thrust::count_if(exec, first, last, thrust::detail::equal_to_value<EqualityComparable>(value));
-} // end count()
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-typename thrust::iterator_traits<InputIterator>::difference_type
-count_if(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-  typedef typename thrust::iterator_traits<InputIterator>::difference_type CountType;
-  
-  thrust::system::detail::generic::count_if_transform<InputType, Predicate, CountType> unary_op(pred);
-  thrust::plus<CountType> binary_op;
-  return thrust::transform_reduce(exec, first, last, unary_op, CountType(0), binary_op);
-} // end count_if()
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/distance.h b/compat/thrust/system/detail/generic/distance.h
deleted file mode 100644
index 80f051ca53..0000000000
--- a/compat/thrust/system/detail/generic/distance.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/distance.inl>
-
diff --git a/compat/thrust/system/detail/generic/distance.inl b/compat/thrust/system/detail/generic/distance.inl
deleted file mode 100644
index a1fdf1458b..0000000000
--- a/compat/thrust/system/detail/generic/distance.inl
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/distance.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag)
-{
-  typename thrust::iterator_traits<InputIterator>::difference_type result(0);
-
-  while(first != last)
-  {
-    ++first;
-    ++result;
-  } // end while
-
-  return result;
-} // end advance()
-
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last, thrust::random_access_traversal_tag)
-{
-  return last - first;
-} // end distance()
-
-} // end detail
-
-template<typename InputIterator>
-  inline typename thrust::iterator_traits<InputIterator>::difference_type
-    distance(InputIterator first, InputIterator last)
-{
-  // dispatch on iterator traversal
-  return thrust::system::detail::generic::detail::distance(first, last,
-    typename thrust::iterator_traversal<InputIterator>::type());
-} // end advance()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/equal.h b/compat/thrust/system/detail/generic/equal.h
deleted file mode 100644
index da7d105825..0000000000
--- a/compat/thrust/system/detail/generic/equal.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-bool equal(thrust::execution_policy<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2);
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-bool equal(thrust::execution_policy<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/equal.inl>
-
diff --git a/compat/thrust/system/detail/generic/equal.inl b/compat/thrust/system/detail/generic/equal.inl
deleted file mode 100644
index 12b8005a2a..0000000000
--- a/compat/thrust/system/detail/generic/equal.inl
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/equal.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/mismatch.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-bool equal(thrust::execution_policy<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;
-  
-  return thrust::equal(exec, first1, last1, first2, thrust::detail::equal_to<InputType1>());
-}
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-bool equal(thrust::execution_policy<DerivedPolicy> &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred)
-{
-  return thrust::mismatch(exec, first1, last1, first2, binary_pred).first == last1;
-}
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/extrema.h b/compat/thrust/system/detail/generic/extrema.h
deleted file mode 100644
index abb4ddc210..0000000000
--- a/compat/thrust/system/detail/generic/extrema.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file extrema.h
- *  \brief Generic device implementations of extrema functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator max_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            BinaryPredicate comp);
-
-template <typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator min_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            BinaryPredicate comp);
-
-template <typename DerivedPolicy, typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(thrust::execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last);
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(thrust::execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/extrema.inl>
-
diff --git a/compat/thrust/system/detail/generic/extrema.inl b/compat/thrust/system/detail/generic/extrema.inl
deleted file mode 100644
index b5f92c3935..0000000000
--- a/compat/thrust/system/detail/generic/extrema.inl
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file distance.h
- *  \brief Device implementations for distance.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/extrema.h>
-#include <thrust/functional.h>
-#include <thrust/pair.h>
-#include <thrust/reduce.h>
-#include <thrust/transform_reduce.h>
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-//////////////
-// Functors //
-//////////////
-
-// return the smaller/larger element making sure to prefer the 
-// first occurance of the minimum/maximum element
-template <typename InputType, typename IndexType, typename BinaryPredicate>
-struct min_element_reduction
-{
-  BinaryPredicate comp;
-
-  __host__ __device__ 
-  min_element_reduction(BinaryPredicate comp) : comp(comp){}
-
-  __host__ __device__ 
-  thrust::tuple<InputType, IndexType>
-  operator()(const thrust::tuple<InputType, IndexType>& lhs, 
-             const thrust::tuple<InputType, IndexType>& rhs )
-  {
-    if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs)))
-      return lhs;
-    if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs)))
-      return rhs;
-
-    // values are equivalent, prefer value with smaller index
-    if(thrust::get<1>(lhs) < thrust::get<1>(rhs))
-      return lhs;
-    else
-      return rhs;
-  } // end operator()()
-
-}; // end min_element_reduction
-
-
-template <typename InputType, typename IndexType, typename BinaryPredicate>
-struct max_element_reduction
-{
-  BinaryPredicate comp;
-
-  __host__ __device__ 
-  max_element_reduction(BinaryPredicate comp) : comp(comp){}
-
-  __host__ __device__ 
-  thrust::tuple<InputType, IndexType>
-  operator()(const thrust::tuple<InputType, IndexType>& lhs, 
-             const thrust::tuple<InputType, IndexType>& rhs )
-  {
-    if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs)))
-      return rhs;
-    if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs)))
-      return lhs;
-
-    // values are equivalent, prefer value with smaller index
-    if(thrust::get<1>(lhs) < thrust::get<1>(rhs))
-      return lhs;
-    else
-      return rhs;
-  } // end operator()()
-
-}; // end max_element_reduction
-
-// return the smaller & larger element making sure to prefer the 
-// first occurance of the minimum/maximum element
-template <typename InputType, typename IndexType, typename BinaryPredicate>
-struct minmax_element_reduction
-{
-  BinaryPredicate comp;
-
-  minmax_element_reduction(BinaryPredicate comp) : comp(comp){}
-
-  __host__ __device__ 
-  thrust::tuple< thrust::tuple<InputType,IndexType>, thrust::tuple<InputType,IndexType> >
-  operator()(const thrust::tuple< thrust::tuple<InputType,IndexType>, thrust::tuple<InputType,IndexType> >& lhs, 
-             const thrust::tuple< thrust::tuple<InputType,IndexType>, thrust::tuple<InputType,IndexType> >& rhs )
-  {
-
-    return thrust::make_tuple(min_element_reduction<InputType, IndexType, BinaryPredicate>(comp)(thrust::get<0>(lhs), thrust::get<0>(rhs)),
-                              max_element_reduction<InputType, IndexType, BinaryPredicate>(comp)(thrust::get<1>(lhs), thrust::get<1>(rhs)));
-  } // end operator()()
-}; // end minmax_element_reduction
-
-template <typename InputType, typename IndexType>
-struct duplicate_tuple
-{
-  __host__ __device__ 
-  thrust::tuple< thrust::tuple<InputType,IndexType>, thrust::tuple<InputType,IndexType> >
-  operator()(const thrust::tuple<InputType,IndexType>& t)
-  {
-    return thrust::make_tuple(t, t);
-  }
-}; // end duplicate_tuple
-
-} // end namespace detail
-
-template <typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator min_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last)
-{
-  typedef typename thrust::iterator_value<ForwardIterator>::type value_type;
-
-  return thrust::min_element(exec, first, last, thrust::less<value_type>());
-} // end min_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  if (first == last)
-    return last;
-
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type      InputType;
-  typedef typename thrust::iterator_traits<ForwardIterator>::difference_type IndexType;
-
-  thrust::tuple<InputType, IndexType> result =
-    thrust::reduce
-      (exec,
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))),
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))) + (last - first),
-       thrust::tuple<InputType, IndexType>(*first, 0),
-       detail::min_element_reduction<InputType, IndexType, BinaryPredicate>(comp));
-
-  return first + thrust::get<1>(result);
-} // end min_element()
-
-template <typename DerivedPolicy, typename ForwardIterator>
-ForwardIterator max_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last)
-{
-  typedef typename thrust::iterator_value<ForwardIterator>::type value_type;
-
-  return thrust::max_element(exec, first, last, thrust::less<value_type>());
-} // end max_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  if (first == last)
-    return last;
-
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type      InputType;
-  typedef typename thrust::iterator_traits<ForwardIterator>::difference_type IndexType;
-
-  thrust::tuple<InputType, IndexType> result =
-    thrust::reduce
-      (exec,
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))),
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))) + (last - first),
-       thrust::tuple<InputType, IndexType>(*first, 0),
-       detail::max_element_reduction<InputType, IndexType, BinaryPredicate>(comp));
-
-  return first + thrust::get<1>(result);
-} // end max_element()
-
-template <typename DerivedPolicy, typename ForwardIterator>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(thrust::execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last)
-{
-  typedef typename thrust::iterator_value<ForwardIterator>::type value_type;
-
-  return thrust::minmax_element(exec, first, last, thrust::less<value_type>());
-} // end minmax_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(thrust::execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp)
-{
-  if (first == last)
-    return thrust::make_pair(last, last);
-
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type      InputType;
-  typedef typename thrust::iterator_traits<ForwardIterator>::difference_type IndexType;
-
-  thrust::tuple< thrust::tuple<InputType,IndexType>, thrust::tuple<InputType,IndexType> > result = 
-    thrust::transform_reduce
-      (exec,
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))),
-       thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator<IndexType>(0))) + (last - first),
-       detail::duplicate_tuple<InputType, IndexType>(),
-       detail::duplicate_tuple<InputType, IndexType>()(thrust::tuple<InputType, IndexType>(*first, 0)),
-       detail::minmax_element_reduction<InputType, IndexType, BinaryPredicate>(comp));
-
-  return thrust::make_pair(first + thrust::get<1>(thrust::get<0>(result)), first + thrust::get<1>(thrust::get<1>(result)));
-} // end minmax_element()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/fill.h b/compat/thrust/system/detail/generic/fill.h
deleted file mode 100644
index 9745b1cf57..0000000000
--- a/compat/thrust/system/detail/generic/fill.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file fill.h
- *  \brief Device implementation of fill.
- */
-
-#pragma once
-
-#include <thrust/detail/internal_functional.h>
-#include <thrust/generate.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
-  OutputIterator fill_n(thrust::execution_policy<DerivedPolicy> &exec,
-                        OutputIterator first,
-                        Size n,
-                        const T &value)
-{
-  // XXX consider using the placeholder expression _1 = value
-  return thrust::generate_n(exec, first, n, thrust::detail::fill_functor<T>(value));
-}
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void fill(thrust::execution_policy<DerivedPolicy> &exec,
-            ForwardIterator first,
-            ForwardIterator last,
-            const T &value)
-{
-  // XXX consider using the placeholder expression _1 = value
-  thrust::generate(exec, first, last, thrust::detail::fill_functor<T>(value));
-}
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/find.h b/compat/thrust/system/detail/generic/find.h
deleted file mode 100644
index 08888c5a7c..0000000000
--- a/compat/thrust/system/detail/generic/find.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator, typename T>
-InputIterator find(thrust::execution_policy<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   const T& value);
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(thrust::execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if_not(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/find.inl>
-
diff --git a/compat/thrust/system/detail/generic/find.inl b/compat/thrust/system/detail/generic/find.inl
deleted file mode 100644
index a3414e1c28..0000000000
--- a/compat/thrust/system/detail/generic/find.inl
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/find.h>
-#include <thrust/reduce.h>
-
-#include <thrust/tuple.h>
-#include <thrust/extrema.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/internal_functional.h>
-
-
-// Contributed by Erich Elsen
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename T>
-InputIterator find(thrust::execution_policy<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   const T& value)
-{
-  // XXX consider a placeholder expression here
-  return thrust::find_if(exec, first, last, thrust::detail::equal_to_value<T>(value));
-} // end find()
-
-
-template<typename TupleType>
-struct find_if_functor
-{
-    __host__ __device__
-    TupleType operator()(const TupleType& lhs, const TupleType& rhs) const
-    {
-        // select the smallest index among true results
-        if (thrust::get<0>(lhs) && thrust::get<0>(rhs))
-            return TupleType(true, (thrust::min)(thrust::get<1>(lhs), thrust::get<1>(rhs)));
-        else if (thrust::get<0>(lhs))
-            return lhs;
-        else
-            return rhs;
-    }
-};
-    
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(thrust::execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-    typedef typename thrust::iterator_traits<InputIterator>::difference_type difference_type;
-    typedef typename thrust::tuple<bool,difference_type> result_type;
-   
-    // empty sequence
-    if (first == last)
-        return last;
-
-    const difference_type n = thrust::distance(first, last);
-
-    // this implementation breaks up the sequence into separate intervals
-    // in an attempt to early-out as soon as a value is found
-
-    // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32
-    const difference_type interval_threshold = 1 << 20;
-    const difference_type interval_size = (std::min)(interval_threshold, n);
-
-    // force transform_iterator output to bool
-    typedef thrust::transform_iterator<Predicate, InputIterator, bool> XfrmIterator;
-    typedef thrust::tuple<XfrmIterator, thrust::counting_iterator<difference_type> > IteratorTuple;
-    typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-    IteratorTuple iter_tuple = thrust::make_tuple(XfrmIterator(first, pred),
-                                                  thrust::counting_iterator<difference_type>(0));
-
-    ZipIterator begin = thrust::make_zip_iterator(iter_tuple);
-    ZipIterator end   = begin + n;
-
-    for(ZipIterator interval_begin = begin; interval_begin < end; interval_begin += interval_size)
-    {
-        ZipIterator interval_end = interval_begin + interval_size;
-        if(end < interval_end)
-        {
-          interval_end = end;
-        } // end if
-
-        result_type result = thrust::reduce(exec,
-                                            interval_begin, interval_end,
-                                            result_type(false,interval_end - begin),
-                                            find_if_functor<result_type>());
-
-        // see if we found something
-        if (thrust::get<0>(result))
-        {
-            return first + thrust::get<1>(result);
-        }
-    }
-
-    //nothing was found if we reach here...
-    return first + n;
-}
-
-
-template<typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if_not(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          Predicate pred)
-{
-    return thrust::find_if(exec, first, last, thrust::detail::not1(pred));
-} // end find()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/for_each.h b/compat/thrust/system/detail/generic/for_each.h
deleted file mode 100644
index 61abe20b6f..0000000000
--- a/compat/thrust/system/detail/generic/for_each.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file for_each.h
- *  \brief Generic implementation of for_each & for_each_n.
- *         It is an error to call these functions; they have no implementation.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/detail/static_assert.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, false>::value) );
-  return first;
-} // end for_each()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator first,
-                         Size n,
-                         UnaryFunction f)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, false>::value) );
-  return first;
-} // end for_each_n()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/gather.h b/compat/thrust/system/detail/generic/gather.h
deleted file mode 100644
index cfb6f85ca5..0000000000
--- a/compat/thrust/system/detail/generic/gather.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(thrust::execution_policy<ExecutionPolicy> &exec,
-                        InputIterator                              map_first,
-                        InputIterator                              map_last,
-                        RandomAccessIterator                       input_first,
-                        OutputIterator                             result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(thrust::execution_policy<ExecutionPolicy> &exec,
-                           InputIterator1                             map_first,
-                           InputIterator1                             map_last,
-                           InputIterator2                             stencil,
-                           RandomAccessIterator                       input_first,
-                           OutputIterator                             result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(thrust::execution_policy<ExecutionPolicy> &exec,
-                           InputIterator1                             map_first,
-                           InputIterator1                             map_last,
-                           InputIterator2                             stencil,
-                           RandomAccessIterator                       input_first,
-                           OutputIterator                             result,
-                           Predicate                                  pred);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/gather.inl>
-
diff --git a/compat/thrust/system/detail/generic/gather.inl b/compat/thrust/system/detail/generic/gather.inl
deleted file mode 100644
index ab2cdd8116..0000000000
--- a/compat/thrust/system/detail/generic/gather.inl
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/gather.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/functional.h>
-#include <thrust/transform.h>
-#include <thrust/iterator/permutation_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather(thrust::execution_policy<DerivedPolicy> &exec,
-                        InputIterator                            map_first,
-                        InputIterator                            map_last,
-                        RandomAccessIterator                     input_first,
-                        OutputIterator                           result)
-{
-  return thrust::transform(exec,
-                           thrust::make_permutation_iterator(input_first, map_first),
-                           thrust::make_permutation_iterator(input_first, map_last),
-                           result,
-                           thrust::identity<typename thrust::iterator_value<RandomAccessIterator>::type>());
-} // end gather()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator>
-  OutputIterator gather_if(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1                           map_first,
-                           InputIterator1                           map_last,
-                           InputIterator2                           stencil,
-                           RandomAccessIterator                     input_first,
-                           OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator2>::type StencilType;
-  return thrust::gather_if(exec,
-                           map_first,
-                           map_last,
-                           stencil,
-                           input_first,
-                           result,
-                           thrust::identity<StencilType>());
-} // end gather_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator gather_if(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1                           map_first,
-                           InputIterator1                           map_last,
-                           InputIterator2                           stencil,
-                           RandomAccessIterator                     input_first,
-                           OutputIterator                           result,
-                           Predicate                                pred)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type InputType;
-  return thrust::transform_if(exec,
-                              thrust::make_permutation_iterator(input_first, map_first),
-                              thrust::make_permutation_iterator(input_first, map_last),
-                              stencil,
-                              result,
-                              thrust::identity<InputType>(),
-                              pred);
-} // end gather_if()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/generate.h b/compat/thrust/system/detail/generic/generate.h
deleted file mode 100644
index e7a8e00726..0000000000
--- a/compat/thrust/system/detail/generic/generate.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Generator>
-  void generate(thrust::execution_policy<ExecutionPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                Generator gen);
-
-template<typename ExecutionPolicy,
-         typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                            OutputIterator first,
-                            Size n,
-                            Generator gen);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/generate.inl>
-
diff --git a/compat/thrust/system/detail/generic/generate.inl b/compat/thrust/system/detail/generic/generate.inl
deleted file mode 100644
index 4da5763f9c..0000000000
--- a/compat/thrust/system/detail/generic/generate.inl
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/generate.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/for_each.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Generator>
-  void generate(thrust::execution_policy<ExecutionPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                Generator gen)
-{
-  thrust::for_each(exec, first, last, typename thrust::detail::generate_functor<ExecutionPolicy,Generator>::type(gen));
-} // end generate()
-
-template<typename ExecutionPolicy,
-         typename OutputIterator,
-         typename Size,
-         typename Generator>
-  OutputIterator generate_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                            OutputIterator first,
-                            Size n,
-                            Generator gen)
-{
-  return thrust::for_each_n(exec, first, n, typename thrust::detail::generate_functor<ExecutionPolicy,Generator>::type(gen));
-} // end generate()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/inner_product.h b/compat/thrust/system/detail/generic/inner_product.h
deleted file mode 100644
index 9ac5c69636..0000000000
--- a/compat/thrust/system/detail/generic/inner_product.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType>
-  OutputType inner_product(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputType init);
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType, typename BinaryFunction1, typename BinaryFunction2>
-OutputType inner_product(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init, 
-                         BinaryFunction1 binary_op1,
-                         BinaryFunction2 binary_op2);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/inner_product.inl>
-
diff --git a/compat/thrust/system/detail/generic/inner_product.inl b/compat/thrust/system/detail/generic/inner_product.inl
deleted file mode 100644
index b6a339ea15..0000000000
--- a/compat/thrust/system/detail/generic/inner_product.inl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/inner_product.h>
-#include <thrust/functional.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/transform_reduce.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType>
-OutputType inner_product(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init)
-{
-  thrust::plus<OutputType>       binary_op1;
-  thrust::multiplies<OutputType> binary_op2;
-  return thrust::inner_product(exec, first1, last1, first2, init, binary_op1, binary_op2);
-} // end inner_product()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType, typename BinaryFunction1, typename BinaryFunction2>
-OutputType inner_product(thrust::execution_policy<DerivedPolicy> &exec,
-                         InputIterator1 first1,
-                         InputIterator1 last1,
-                         InputIterator2 first2,
-                         OutputType init, 
-                         BinaryFunction1 binary_op1,
-                         BinaryFunction2 binary_op2)
-{
-  typedef thrust::zip_iterator<thrust::tuple<InputIterator1,InputIterator2> > ZipIter;
-
-  ZipIter first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2));
-
-  // only the first iterator in the tuple is relevant for the purposes of last
-  ZipIter last  = thrust::make_zip_iterator(thrust::make_tuple(last1, first2));
-
-  return thrust::transform_reduce(exec, first, last, thrust::detail::zipped_binary_op<OutputType,BinaryFunction2>(binary_op2), init, binary_op1);
-} // end inner_product()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/logical.h b/compat/thrust/system/detail/generic/logical.h
deleted file mode 100644
index e0d01e30a5..0000000000
--- a/compat/thrust/system/detail/generic/logical.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/find.h>
-#include <thrust/logical.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template <typename ExecutionPolicy, typename InputIterator, typename Predicate>
-bool all_of(thrust::execution_policy<ExecutionPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  return thrust::find_if(exec, first, last, thrust::detail::not1(pred)) == last;
-}
-
-template <typename ExecutionPolicy, typename InputIterator, typename Predicate>
-bool any_of(thrust::execution_policy<ExecutionPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  return thrust::find_if(exec, first, last, pred) != last;
-}
-
-template <typename ExecutionPolicy, typename InputIterator, typename Predicate>
-bool none_of(thrust::execution_policy<ExecutionPolicy> &exec, InputIterator first, InputIterator last, Predicate pred)
-{
-  return !thrust::any_of(exec, first, last, pred);
-}
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/memory.h b/compat/thrust/system/detail/generic/memory.h
deleted file mode 100644
index c0fe623ac4..0000000000
--- a/compat/thrust/system/detail/generic/memory.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file generic/memory.h
- *  \brief Generic implementation of memory functions.
- *         Calling some of these is an error. They have no implementation.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/pointer.h>
-#include <thrust/pair.h>
-#include <thrust/system/detail/generic/type_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename Size> void malloc(thrust::execution_policy<DerivedPolicy> &, Size);
-
-template<typename T, typename DerivedPolicy>
-thrust::pointer<T,DerivedPolicy> malloc(thrust::execution_policy<DerivedPolicy> &s, std::size_t n);
-
-template<typename DerivedPolicy, typename Pointer> void free(thrust::execution_policy<DerivedPolicy> &, Pointer);
-
-template<typename Pointer1, typename Pointer2>
-__host__ __device__
-void assign_value(tag, Pointer1, Pointer2);
-
-template<typename DerivedPolicy, typename Pointer>
-__host__ __device__
-void get_value(thrust::execution_policy<DerivedPolicy> &, Pointer);
-
-template<typename Pointer1, typename Pointer2>
-__host__ __device__
-void iter_swap(tag, Pointer1, Pointer2);
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
-#include <thrust/system/detail/generic/memory.inl>
-
diff --git a/compat/thrust/system/detail/generic/memory.inl b/compat/thrust/system/detail/generic/memory.inl
deleted file mode 100644
index f89a763a62..0000000000
--- a/compat/thrust/system/detail/generic/memory.inl
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-#include <thrust/system/detail/generic/memory.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/detail/adl/malloc_and_free.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/detail/malloc_and_free.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename Size>
-  void malloc(thrust::execution_policy<DerivedPolicy> &, Size)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<Size, false>::value) );
-}
-
-
-template<typename T, typename DerivedPolicy>
-  thrust::pointer<T,DerivedPolicy>
-    malloc(thrust::execution_policy<DerivedPolicy> &exec, std::size_t n)
-{
-  thrust::pointer<void,DerivedPolicy> void_ptr = thrust::malloc(exec, sizeof(T) * n);
-
-  return pointer<T,DerivedPolicy>(static_cast<T*>(void_ptr.get()));
-} // end malloc()
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void free(thrust::execution_policy<DerivedPolicy> &, Pointer)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<Pointer, false>::value) );
-}
-
-
-template<typename DerivedPolicy, typename Pointer1, typename Pointer2>
-__host__ __device__
-void assign_value(thrust::execution_policy<DerivedPolicy> &, Pointer1, Pointer2)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<Pointer1, false>::value) );
-}
-
-
-template<typename DerivedPolicy, typename Pointer>
-__host__ __device__
-void get_value(thrust::execution_policy<DerivedPolicy> &, Pointer)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<Pointer, false>::value) );
-}
-
-
-template<typename Pointer1, typename Pointer2>
-__host__ __device__
-void iter_swap(tag, Pointer1, Pointer2)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<Pointer1, false>::value) );
-}
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/merge.h b/compat/thrust/system/detail/generic/merge.h
deleted file mode 100644
index 5f0b99640d..0000000000
--- a/compat/thrust/system/detail/generic/merge.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-// XXX calling this function is an error; there is no implementation
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator merge(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakOrdering comp);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result);
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2, typename Compare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 Compare comp);
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/merge.inl>
-
diff --git a/compat/thrust/system/detail/generic/merge.inl b/compat/thrust/system/detail/generic/merge.inl
deleted file mode 100644
index b913611168..0000000000
--- a/compat/thrust/system/detail/generic/merge.inl
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/system/detail/generic/merge.h>
-#include <thrust/merge.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/internal_functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator merge(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result,
-                       StrictWeakOrdering comp)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator1, false>::value) );
-  return result;
-} // end merge()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator merge(thrust::execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 first1,
-                       InputIterator1 last1,
-                       InputIterator2 first2,
-                       InputIterator2 last2,
-                       OutputIterator result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::merge(exec,first1,last1,first2,last2,result,thrust::less<value_type>());
-} // end merge()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2, typename Compare>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result,
-                 Compare comp)
-{
-  typedef thrust::tuple<InputIterator1, InputIterator3>   iterator_tuple1;
-  typedef thrust::tuple<InputIterator2, InputIterator4>   iterator_tuple2;
-  typedef thrust::tuple<OutputIterator1, OutputIterator2> iterator_tuple3;
-
-  typedef thrust::zip_iterator<iterator_tuple1> zip_iterator1;
-  typedef thrust::zip_iterator<iterator_tuple2> zip_iterator2;
-  typedef thrust::zip_iterator<iterator_tuple3> zip_iterator3;
-
-  zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1));
-  zip_iterator1 zipped_last1  = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1));
-
-  zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2));
-  zip_iterator2 zipped_last2  = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2));
-
-  zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result));
-
-  thrust::detail::compare_first<Compare> comp_first(comp);
-
-  iterator_tuple3 result = thrust::merge(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple();
-
-  return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result));
-} // end merge_by_key()
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4, typename OutputIterator1, typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    merge_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                 InputIterator1 keys_first1, InputIterator1 keys_last1,
-                 InputIterator2 keys_first2, InputIterator2 keys_last2,
-                 InputIterator3 values_first1, InputIterator4 values_first2,
-                 OutputIterator1 keys_result,
-                 OutputIterator2 values_result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::merge_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less<value_type>());
-} // end merge_by_key()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/mismatch.h b/compat/thrust/system/detail/generic/mismatch.h
deleted file mode 100644
index dc581ffbee..0000000000
--- a/compat/thrust/system/detail/generic/mismatch.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-  thrust::pair<InputIterator1, InputIterator2>
-    mismatch(thrust::execution_policy<DerivedPolicy> &exec,
-             InputIterator1 first1,
-             InputIterator1 last1,
-             InputIterator2 first2);
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-  thrust::pair<InputIterator1, InputIterator2>
-    mismatch(thrust::execution_policy<DerivedPolicy> &exec,
-             InputIterator1 first1,
-             InputIterator1 last1,
-             InputIterator2 first2,
-             BinaryPredicate pred);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/mismatch.inl>
-
diff --git a/compat/thrust/system/detail/generic/mismatch.inl b/compat/thrust/system/detail/generic/mismatch.inl
deleted file mode 100644
index 923c27f71b..0000000000
--- a/compat/thrust/system/detail/generic/mismatch.inl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/mismatch.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/find.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
-  thrust::pair<InputIterator1, InputIterator2>
-    mismatch(thrust::execution_policy<DerivedPolicy> &exec,
-             InputIterator1 first1,
-             InputIterator1 last1,
-             InputIterator2 first2)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type InputType1;
-  
-  // XXX use a placeholder expression here
-  return thrust::mismatch(exec, first1, last1, first2, thrust::detail::equal_to<InputType1>());
-} // end mismatch()
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
-  thrust::pair<InputIterator1, InputIterator2>
-    mismatch(thrust::execution_policy<DerivedPolicy> &exec,
-             InputIterator1 first1,
-             InputIterator1 last1,
-             InputIterator2 first2,
-             BinaryPredicate pred)
-{
-  // Contributed by Erich Elsen
-  typedef thrust::tuple<InputIterator1,InputIterator2> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple>          ZipIterator;
-  
-  ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2));
-  ZipIterator zipped_last  = thrust::make_zip_iterator(thrust::make_tuple(last1, first2));
-  
-  ZipIterator result = thrust::find_if_not(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate<BinaryPredicate>(pred));
-  
-  return thrust::make_pair(thrust::get<0>(result.get_iterator_tuple()),
-                           thrust::get<1>(result.get_iterator_tuple()));
-} // end mismatch()
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/partition.h b/compat/thrust/system/detail/generic/partition.h
deleted file mode 100644
index 63daa1d1c1..0000000000
--- a/compat/thrust/system/detail/generic/partition.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file partition.h
- *  \brief Generic implementations of partition functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(thrust::execution_policy<ExecutionPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(thrust::execution_policy<ExecutionPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(thrust::execution_policy<ExecutionPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(thrust::execution_policy<ExecutionPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                   InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition_point(thrust::execution_policy<ExecutionPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred);
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename Predicate>
-  bool is_partitioned(thrust::execution_policy<ExecutionPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/partition.inl>
-
diff --git a/compat/thrust/system/detail/generic/partition.inl b/compat/thrust/system/detail/generic/partition.inl
deleted file mode 100644
index 3298afc6f5..0000000000
--- a/compat/thrust/system/detail/generic/partition.inl
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/partition.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/pair.h>
-
-#include <thrust/remove.h>
-#include <thrust/count.h>
-#include <thrust/advance.h>
-#include <thrust/partition.h>
-#include <thrust/sort.h>
-#include <thrust/iterator/transform_iterator.h>
-
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/temporary_array.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(thrust::execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-
-  // copy input to temp buffer
-  thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, first, last);
-
-  // count the size of the true partition
-  typename thrust::iterator_difference<ForwardIterator>::type num_true = thrust::count_if(exec, first,last,pred);
-
-  // point to the beginning of the false partition
-  ForwardIterator out_false = first;
-  thrust::advance(out_false, num_true);
-
-  return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), first, out_false, pred).first;
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(thrust::execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-
-  // copy input to temp buffer
-  thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, first, last);
-
-  // count the size of the true partition
-  InputIterator stencil_last = stencil;
-  thrust::advance(stencil_last, temp.size());
-  typename thrust::iterator_difference<InputIterator>::type num_true = thrust::count_if(exec, stencil, stencil_last, pred);
-
-  // point to the beginning of the false partition
-  ForwardIterator out_false = first;
-  thrust::advance(out_false, num_true);
-
-  return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), stencil, first, out_false, pred).first;
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  thrust::detail::unary_negate<Predicate> not_pred(pred);
-
-  // remove_copy_if the true partition to out_true
-  OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, out_true, not_pred);
-
-  // remove_copy_if the false partition to out_false
-  OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, out_false, pred);
-
-  return thrust::make_pair(end_of_true_partition, end_of_false_partition);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  thrust::detail::unary_negate<Predicate> not_pred(pred);
-
-  // remove_copy_if the true partition to out_true
-  OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, stencil, out_true, not_pred);
-
-  // remove_copy_if the false partition to out_false
-  OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, stencil, out_false, pred);
-
-  return thrust::make_pair(end_of_true_partition, end_of_false_partition);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  return thrust::stable_partition(exec, first, last, pred);
-} // end partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator partition(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  return thrust::stable_partition(exec, first, last, stencil, pred);
-} // end partition()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                   InputIterator first,
-                   InputIterator last,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  return thrust::stable_partition_copy(exec,first,last,out_true,out_false,pred);
-} // end partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    partition_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                   InputIterator1 first,
-                   InputIterator1 last,
-                   InputIterator2 stencil,
-                   OutputIterator1 out_true,
-                   OutputIterator2 out_false,
-                   Predicate pred)
-{
-  return thrust::stable_partition_copy(exec,first,last,stencil,out_true,out_false,pred);
-} // end partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition_point(thrust::execution_policy<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Predicate pred)
-{
-  return thrust::find_if_not(exec, first, last, pred);
-} // end partition_point()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Predicate>
-  bool is_partitioned(thrust::execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  return thrust::is_sorted(exec,
-                           thrust::make_transform_iterator(first, thrust::detail::not1(pred)),
-                           thrust::make_transform_iterator(last,  thrust::detail::not1(pred)));
-} // end is_partitioned()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/reduce.h b/compat/thrust/system/detail/generic/reduce.h
deleted file mode 100644
index 2811df164f..0000000000
--- a/compat/thrust/system/detail/generic/reduce.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename InputIterator>
-  typename thrust::iterator_traits<InputIterator>::value_type
-    reduce(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last);
-
-template<typename DerivedPolicy, typename InputIterator, typename T>
-  T reduce(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, T init);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename T,
-         typename BinaryFunction>
-  T reduce(thrust::execution_policy<DerivedPolicy> &exec, InputIterator first, InputIterator last, T init, BinaryFunction binary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/reduce.inl>
-
diff --git a/compat/thrust/system/detail/generic/reduce.inl b/compat/thrust/system/detail/generic/reduce.inl
deleted file mode 100644
index 8f52385163..0000000000
--- a/compat/thrust/system/detail/generic/reduce.inl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/reduce.h>
-#include <thrust/system/detail/generic/reduce.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/functional.h>
-#include <thrust/detail/static_assert.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy, typename InputIterator>
-  typename thrust::iterator_traits<InputIterator>::value_type
-    reduce(thrust::execution_policy<ExecutionPolicy> &exec, InputIterator first, InputIterator last)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type InputType;
-
-  // use InputType(0) as init by default
-  return thrust::reduce(exec, first, last, InputType(0));
-} // end reduce()
-
-
-template<typename ExecutionPolicy, typename InputIterator, typename T>
-  T reduce(thrust::execution_policy<ExecutionPolicy> &exec, InputIterator first, InputIterator last, T init)
-{
-  // use plus<T> by default
-  return thrust::reduce(exec, first, last, init, thrust::plus<T>());
-} // end reduce()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator,
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(thrust::execution_policy<ExecutionPolicy> &exec,
-                    RandomAccessIterator first,
-                    RandomAccessIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  // unimplemented
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator, false>::value) );
-  return OutputType();
-} // end reduce()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/reduce_by_key.h b/compat/thrust/system/detail/generic/reduce_by_key.h
deleted file mode 100644
index c6064ab53e..0000000000
--- a/compat/thrust/system/detail/generic/reduce_by_key.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/reduce_by_key.inl>
-
diff --git a/compat/thrust/system/detail/generic/reduce_by_key.inl b/compat/thrust/system/detail/generic/reduce_by_key.inl
deleted file mode 100644
index 2ca21a5aab..0000000000
--- a/compat/thrust/system/detail/generic/reduce_by_key.inl
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce_by_key.inl
- *  \brief Inline file for reduce_by_key.h.
- */
-
-#pragma once
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/transform.h>
-#include <thrust/scatter.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <limits>
-
-#include <thrust/detail/internal_functional.h>
-#include <thrust/scan.h>
-#include <thrust/detail/temporary_array.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template <typename ValueType, typename TailFlagType, typename AssociativeOperator>
-struct reduce_by_key_functor
-{
-    AssociativeOperator binary_op;
-
-    typedef typename thrust::tuple<ValueType, TailFlagType> result_type;
-
-    __host__ __device__
-    reduce_by_key_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {}
-
-    __host__ __device__
-    result_type operator()(result_type a, result_type b)
-    {
-        return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)),
-                           thrust::get<1>(a) | thrust::get<1>(b));
-    }
-};
-
-} // end namespace detail
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op)
-{
-    typedef typename thrust::iterator_traits<InputIterator1>::difference_type difference_type;
-    typedef typename thrust::iterator_traits<InputIterator1>::value_type  KeyType;
-
-    typedef unsigned int FlagType;  // TODO use difference_type
-
-    // the pseudocode for deducing the type of the temporary used below:
-    // 
-    // if BinaryFunction is AdaptableBinaryFunction
-    //   TemporaryType = AdaptableBinaryFunction::result_type
-    // else if OutputIterator2 is a "pure" output iterator
-    //   TemporaryType = InputIterator2::value_type
-    // else
-    //   TemporaryType = OutputIterator2::value_type
-    //
-    // XXX upon c++0x, TemporaryType needs to be:
-    // result_of<BinaryFunction>::type
-
-    typedef typename thrust::detail::eval_if<
-      thrust::detail::has_result_type<BinaryFunction>::value,
-      thrust::detail::result_type<BinaryFunction>,
-      thrust::detail::eval_if<
-        thrust::detail::is_output_iterator<OutputIterator2>::value,
-        thrust::iterator_value<InputIterator2>,
-        thrust::iterator_value<OutputIterator2>
-      >
-    >::type ValueType;
-
-    if (keys_first == keys_last)
-        return thrust::make_pair(keys_output, values_output);
-
-    // input size
-    difference_type n = keys_last - keys_first;
-
-    InputIterator2 values_last = values_first + n;
-    
-    // compute head flags
-    thrust::detail::temporary_array<FlagType,ExecutionPolicy> head_flags(exec, n);
-    thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, head_flags.begin() + 1, thrust::detail::not2(binary_pred));
-    head_flags[0] = 1;
-
-    // compute tail flags
-    thrust::detail::temporary_array<FlagType,ExecutionPolicy> tail_flags(exec, n); //COPY INSTEAD OF TRANSFORM
-    thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, tail_flags.begin(), thrust::detail::not2(binary_pred));
-    tail_flags[n-1] = 1;
-
-    // scan the values by flag
-    thrust::detail::temporary_array<ValueType,ExecutionPolicy> scanned_values(exec, n);
-    thrust::detail::temporary_array<FlagType,ExecutionPolicy>  scanned_tail_flags(exec, n);
-    
-    thrust::inclusive_scan
-        (exec,
-         thrust::make_zip_iterator(thrust::make_tuple(values_first,           head_flags.begin())),
-         thrust::make_zip_iterator(thrust::make_tuple(values_last,            head_flags.end())),
-         thrust::make_zip_iterator(thrust::make_tuple(scanned_values.begin(), scanned_tail_flags.begin())),
-         detail::reduce_by_key_functor<ValueType, FlagType, BinaryFunction>(binary_op));
-
-    thrust::exclusive_scan(exec, tail_flags.begin(), tail_flags.end(), scanned_tail_flags.begin(), FlagType(0), thrust::plus<FlagType>());
-
-    // number of unique keys
-    FlagType N = scanned_tail_flags[n - 1] + 1;
-    
-    // scatter the keys and accumulated values    
-    thrust::scatter_if(exec, keys_first,            keys_last,             scanned_tail_flags.begin(), head_flags.begin(), keys_output);
-    thrust::scatter_if(exec, scanned_values.begin(), scanned_values.end(), scanned_tail_flags.begin(), tail_flags.begin(), values_output);
-
-    return thrust::make_pair(keys_output + N, values_output + N); 
-} // end reduce_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type KeyType;
-
-  // use equal_to<KeyType> as default BinaryPredicate
-  return thrust::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to<KeyType>());
-} // end reduce_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred)
-{
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::is_output_iterator<OutputIterator2>::value,
-    thrust::iterator_value<InputIterator2>,
-    thrust::iterator_value<OutputIterator2>
-  >::type T;
-
-  // use plus<T> as default BinaryFunction
-  return thrust::reduce_by_key(exec,
-                               keys_first, keys_last, 
-                               values_first,
-                               keys_output,
-                               values_output,
-                               binary_pred,
-                               thrust::plus<T>());
-} // end reduce_by_key()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/remove.h b/compat/thrust/system/detail/generic/remove.h
deleted file mode 100644
index e23673574e..0000000000
--- a/compat/thrust/system/detail/generic/remove.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file remove.h
- *  \brief Generic implementations of remove functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(thrust::execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value);
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/remove.inl>
-
diff --git a/compat/thrust/system/detail/generic/remove.inl b/compat/thrust/system/detail/generic/remove.inl
deleted file mode 100644
index 8a533e029a..0000000000
--- a/compat/thrust/system/detail/generic/remove.inl
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file remove.inl
- *  \brief Inline file for remove.h
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/remove.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/copy_if.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/remove.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  ForwardIterator remove(thrust::execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         const T &value)
-{
-  thrust::detail::equal_to_value<T> pred(value);
-
-  // XXX consider using a placeholder here
-  return thrust::remove_if(exec, first, last, pred);
-} // end remove()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator remove_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator result,
-                             const T &value)
-{
-  thrust::detail::equal_to_value<T> pred(value);
-
-  // XXX consider using a placeholder here
-  return thrust::remove_copy_if(exec, first, last, result, pred);
-} // end remove_copy()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-
-  // create temporary storage for an intermediate result
-  thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, first, last);
-
-  // remove into temp
-  return thrust::remove_copy_if(exec, temp.begin(), temp.end(), temp.begin(), first, pred);
-} // end remove_if()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(thrust::execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-
-  // create temporary storage for an intermediate result
-  thrust::detail::temporary_array<InputType,DerivedPolicy> temp(exec, first, last);
-
-  // remove into temp
-  return thrust::remove_copy_if(exec, temp.begin(), temp.end(), stencil, first, pred);
-} // end remove_if() 
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  return thrust::remove_copy_if(exec, first, last, first, result, pred);
-} // end remove_copy_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  return thrust::copy_if(exec, first, last, stencil, result, thrust::detail::not1(pred));
-} // end remove_copy_if()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/replace.h b/compat/thrust/system/detail/generic/replace.h
deleted file mode 100644
index deb2e55bc8..0000000000
--- a/compat/thrust/system/detail/generic/replace.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                 InputIterator first,
-                                 InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                 InputIterator1 first,
-                                 InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value);
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator result,
-                              const T &old_value,
-                              const T &new_value);
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator first,
-                  ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value);
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator first,
-                  ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value);
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void replace(thrust::execution_policy<DerivedPolicy> &exec,
-               ForwardIterator first,
-               ForwardIterator last,
-               const T &old_value,
-               const T &new_value);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/replace.inl>
-
diff --git a/compat/thrust/system/detail/generic/replace.inl b/compat/thrust/system/detail/generic/replace.inl
deleted file mode 100644
index 52e7118ecc..0000000000
--- a/compat/thrust/system/detail/generic/replace.inl
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/replace.h>
-#include <thrust/transform.h>
-#include <thrust/replace.h>
-#include <thrust/detail/internal_functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-// this functor receives x, and returns a new_value if predicate(x) is true; otherwise,
-// it returns x
-template<typename Predicate, typename NewType, typename OutputType>
-  struct new_value_if
-{
-  new_value_if(Predicate p, NewType nv):pred(p),new_value(nv){}
-
-  template<typename InputType>
-  __host__ __device__
-  OutputType operator()(const InputType x) const
-  {
-    return pred(x) ? new_value : x;
-  } // end operator()()
-
-  // this version of operator()() works like the previous but
-  // feeds its second argument to pred
-  template<typename InputType, typename PredicateArgumentType>
-  __host__ __device__
-  OutputType operator()(const InputType x, const PredicateArgumentType y)
-  {
-    return pred(y) ? new_value : x;
-  } // end operator()()
-  
-  Predicate pred;
-  NewType new_value;
-}; // end new_value_if
-
-// this unary functor ignores its argument and returns a constant
-template<typename T>
-  struct constant_unary
-{
-  constant_unary(T _c):c(_c){}
-
-  template<typename U>
-  __host__ __device__
-  T operator()(U &x)
-  {
-    return c;
-  } // end operator()()
-
-  T c;
-}; // end constant_unary
-
-} // end detail
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                 InputIterator first,
-                                 InputIterator last,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-
-  detail::new_value_if<Predicate,T,OutputType> op(pred,new_value);
-  return thrust::transform(exec, first, last, result, op);
-} // end replace_copy_if()
-
-template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
-  OutputIterator replace_copy_if(thrust::execution_policy<DerivedPolicy> &exec,
-                                 InputIterator1 first,
-                                 InputIterator1 last,
-                                 InputIterator2 stencil,
-                                 OutputIterator result,
-                                 Predicate pred,
-                                 const T &new_value)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-
-  detail::new_value_if<Predicate,T,OutputType> op(pred,new_value);
-  return thrust::transform(exec, first, last, stencil, result, op);
-} // end replace_copy_if()
-
-
-template<typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
-  OutputIterator replace_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              OutputIterator result,
-                              const T &old_value,
-                              const T &new_value)
-{
-  thrust::detail::equal_to_value<T> pred(old_value);
-  return thrust::replace_copy_if(exec, first, last, result, pred, new_value);
-} // end replace_copy()
-
-template<typename DerivedPolicy, typename ForwardIterator, typename Predicate, typename T>
-  void replace_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator first,
-                  ForwardIterator last,
-                  Predicate pred,
-                  const T &new_value)
-{
-  detail::constant_unary<T> f(new_value);
-
-  // XXX replace this with generate_if:
-  // constant_nullary<T> f(new_value);
-  // generate_if(first, last, first, f, pred);
-  thrust::transform_if(exec, first, last, first, first, f, pred);
-} // end replace_if()
-
-template<typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
-  void replace_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator first,
-                  ForwardIterator last,
-                  InputIterator stencil,
-                  Predicate pred,
-                  const T &new_value)
-{
-  detail::constant_unary<T> f(new_value);
-
-  // XXX replace this with generate_if:
-  // constant_nullary<T> f(new_value);
-  // generate_if(stencil, stencil + n, first, f, pred);
-  thrust::transform_if(exec, first, last, stencil, first, f, pred);
-} // end replace_if()
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void replace(thrust::execution_policy<DerivedPolicy> &exec,
-               ForwardIterator first,
-               ForwardIterator last,
-               const T &old_value,
-               const T &new_value)
-{
-  thrust::detail::equal_to_value<T> pred(old_value);
-  return thrust::replace_if(exec, first, last, pred, new_value);
-} // end replace()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/reverse.h b/compat/thrust/system/detail/generic/reverse.h
deleted file mode 100644
index 327bf221b1..0000000000
--- a/compat/thrust/system/detail/generic/reverse.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy, typename BidirectionalIterator>
-  void reverse(thrust::execution_policy<DerivedPolicy> &exec,
-               BidirectionalIterator first,
-               BidirectionalIterator last);
-
-template<typename DerivedPolicy,
-         typename BidirectionalIterator,
-         typename OutputIterator>
-  OutputIterator reverse_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                              BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/reverse.inl>
-
diff --git a/compat/thrust/system/detail/generic/reverse.inl b/compat/thrust/system/detail/generic/reverse.inl
deleted file mode 100644
index 27c1bbf2fa..0000000000
--- a/compat/thrust/system/detail/generic/reverse.inl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/reverse.h>
-#include <thrust/advance.h>
-#include <thrust/distance.h>
-#include <thrust/detail/copy.h>
-#include <thrust/swap.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/reverse_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy, typename BidirectionalIterator>
-  void reverse(thrust::execution_policy<ExecutionPolicy> &exec,
-               BidirectionalIterator first,
-               BidirectionalIterator last)
-{
-  typedef typename thrust::iterator_difference<BidirectionalIterator>::type difference_type;
-
-  // find the midpoint of [first,last)
-  difference_type N = thrust::distance(first, last);
-  BidirectionalIterator mid(first);
-  thrust::advance(mid, N / 2);
-
-  // swap elements of [first,mid) with [last - 1, mid)
-  thrust::swap_ranges(exec, first, mid, thrust::make_reverse_iterator(last));
-} // end reverse()
-
-template<typename ExecutionPolicy,
-         typename BidirectionalIterator,
-         typename OutputIterator>
-  OutputIterator reverse_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                              BidirectionalIterator first,
-                              BidirectionalIterator last,
-                              OutputIterator result)
-{
-  return thrust::copy(exec,
-                      thrust::make_reverse_iterator(last),
-                      thrust::make_reverse_iterator(first),
-                      result);
-} // end reverse_copy()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-
diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.h b/compat/thrust/system/detail/generic/scalar/binary_search.h
deleted file mode 100644
index 6ed9e8d9ee..0000000000
--- a/compat/thrust/system/detail/generic/scalar/binary_search.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-namespace detail
-{
-
-namespace generic
-{
-
-namespace scalar
-{
-
-template<typename RandomAccessIterator, typename Size, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator lower_bound_n(RandomAccessIterator first,
-                                   Size n,
-                                   const T &val,
-                                   BinaryPredicate comp);
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last,
-                                 const T &val,
-                                 BinaryPredicate comp);
-
-template<typename RandomAccessIterator, typename Size, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator upper_bound_n(RandomAccessIterator first,
-                                   Size n,
-                                   const T &val,
-                                   BinaryPredicate comp);
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last,
-                                 const T &val,
-                                 BinaryPredicate comp);
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-  pair<RandomAccessIterator,RandomAccessIterator>
-    equal_range(RandomAccessIterator first, RandomAccessIterator last,
-                const T &val,
-                BinaryPredicate comp);
-
-template<typename RandomAccessIterator, typename T, typename Compare>
-__host__ __device__
-bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp);
-
-} // end scalar
-
-} // end generic
-
-} // end detail
-
-} // end system
-
-} // end thrust
-
-#include <thrust/system/detail/generic/scalar/binary_search.inl>
-
diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.inl b/compat/thrust/system/detail/generic/scalar/binary_search.inl
deleted file mode 100644
index 5a9d379612..0000000000
--- a/compat/thrust/system/detail/generic/scalar/binary_search.inl
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/detail/function.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-namespace detail
-{
-
-namespace generic
-{
-
-namespace scalar
-{
-
-template<typename RandomAccessIterator, typename Size, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator lower_bound_n(RandomAccessIterator first,
-                                   Size n,
-                                   const T &val,
-                                   BinaryPredicate comp)
-{
-  // wrap comp
-  thrust::detail::host_device_function<
-    BinaryPredicate,
-    bool
-  > wrapped_comp(comp);
-
-  Size start = 0, i;
-  while(start < n)
-  {
-    i = (start + n) / 2;
-    if(wrapped_comp(first[i], val))
-    {
-      start = i + 1;
-    }
-    else
-    {
-      n = i;
-    }
-  } // end while
-  
-  return first + start;
-}
-
-// XXX generalize these upon implementation of scalar::distance & scalar::advance
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last,
-                                 const T &val,
-                                 BinaryPredicate comp)
-{
-  typename thrust::iterator_difference<RandomAccessIterator>::type n = last - first;
-  return lower_bound_n(first, n, val, comp);
-}
-
-template<typename RandomAccessIterator, typename Size, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator upper_bound_n(RandomAccessIterator first,
-                                   Size n,
-                                   const T &val,
-                                   BinaryPredicate comp)
-{
-  // wrap comp
-  thrust::detail::host_device_function<
-    BinaryPredicate,
-    bool
-  > wrapped_comp(comp);
-
-  Size start = 0, i;
-  while(start < n)
-  {
-    i = (start + n) / 2;
-    if(wrapped_comp(val, first[i]))
-    {
-      n = i;
-    }
-    else
-    {
-      start = i + 1;
-    }
-  } // end while
-  
-  return first + start;
-}
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last,
-                                 const T &val,
-                                 BinaryPredicate comp)
-{
-  typename thrust::iterator_difference<RandomAccessIterator>::type n = last - first;
-  return upper_bound_n(first, n, val, comp);
-}
-
-template<typename RandomAccessIterator, typename T, typename BinaryPredicate>
-__host__ __device__
-  pair<RandomAccessIterator,RandomAccessIterator>
-    equal_range(RandomAccessIterator first, RandomAccessIterator last,
-                const T &val,
-                BinaryPredicate comp)
-{
-  RandomAccessIterator lb = thrust::system::detail::generic::scalar::lower_bound(first, last, val, comp);
-  return thrust::make_pair(lb, thrust::system::detail::generic::scalar::upper_bound(lb, last, val, comp));
-}
-
-
-template<typename RandomAccessIterator, typename T, typename Compare>
-__host__ __device__
-bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp)
-{
-  RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(first, last, value, comp);
-
-  // wrap comp
-  thrust::detail::host_device_function<
-    Compare,
-    bool
-  > wrapped_comp(comp);
-
-  return iter != last && !wrapped_comp(value,*iter);
-}
-
-} // end scalar
-
-} // end generic
-
-} // end detail
-
-} // end system
-
-} // end thrust
-
-#include <thrust/system/detail/generic/scalar/binary_search.inl>
-
diff --git a/compat/thrust/system/detail/generic/scan.h b/compat/thrust/system/detail/generic/scan.h
deleted file mode 100644
index 205f87ff9f..0000000000
--- a/compat/thrust/system/detail/generic/scan.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-// XXX it is an error to call this function; it has no implementation 
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init);
-
-
-// XXX it is an error to call this function; it has no implementation 
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/scan.inl>
-
diff --git a/compat/thrust/system/detail/generic/scan.inl b/compat/thrust/system/detail/generic/scan.inl
deleted file mode 100644
index 33e0803c70..0000000000
--- a/compat/thrust/system/detail/generic/scan.inl
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/system/detail/generic/scan.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/scan.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-
-  typedef typename thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-  >::type ValueType;
-
-  // assume plus as the associative operator
-  return thrust::inclusive_scan(exec, first, last, result, thrust::plus<ValueType>());
-} // end inclusive_scan()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-
-  typedef typename thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-  >::type ValueType;
-
-  // assume 0 as the initialization value
-  return thrust::exclusive_scan(exec, first, last, result, ValueType(0));
-} // end exclusive_scan()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init)
-{
-  // assume plus as the associative operator
-  return thrust::exclusive_scan(exec, first, last, result, init, thrust::plus<T>());
-} // end exclusive_scan()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, false>::value) );
-  return result;
-} // end inclusive_scan
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator, false>::value) );
-  return result;
-} // end exclusive_scan()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/scan_by_key.h b/compat/thrust/system/detail/generic/scan_by_key.h
deleted file mode 100644
index 160121b58b..0000000000
--- a/compat/thrust/system/detail/generic/scan_by_key.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan_by_key.h
- *  \brief Generic implementations of key-value scans.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/scan_by_key.inl>
-
diff --git a/compat/thrust/system/detail/generic/scan_by_key.inl b/compat/thrust/system/detail/generic/scan_by_key.inl
deleted file mode 100644
index d866ddec15..0000000000
--- a/compat/thrust/system/detail/generic/scan_by_key.inl
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/scan_by_key.h>
-#include <thrust/functional.h>
-#include <thrust/transform.h>
-#include <thrust/replace.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template <typename OutputType, typename HeadFlagType, typename AssociativeOperator>
-struct segmented_scan_functor
-{
-    AssociativeOperator binary_op;
-
-    typedef typename thrust::tuple<OutputType, HeadFlagType> result_type;
-
-    __host__ __device__
-    segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {}
-
-    __host__ __device__
-    result_type operator()(result_type a, result_type b)
-    {
-        return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)),
-                           thrust::get<1>(a) | thrust::get<1>(b));
-    }
-};
-
-} // end namespace detail
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;
-  return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to<InputType1>());
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus<OutputType>());
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator inclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  typedef unsigned int HeadFlagType;
-
-  const size_t n = last1 - first1;
-
-  if(n != 0)
-  {
-    // compute head flags
-    thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);
-    flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));
-
-    // scan key-flag tuples, 
-    // For additional details refer to Section 2 of the following paper
-    //    S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"
-    //    NVIDIA Technical Report NVR-2008-003, December 2008
-    //    http://mgarland.org/files/papers/nvr-2008-003.pdf
-    thrust::inclusive_scan
-        (exec,
-         thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())),
-         thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n,
-         thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())),
-         detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));
-  }
-
-  return result + n;
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0));
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type InputType1;
-  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to<InputType1>());
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus<OutputType>());
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename AssociativeOperator>
-  OutputIterator exclusive_scan_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                       InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       AssociativeOperator binary_op)
-{
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type OutputType;
-  typedef unsigned int HeadFlagType;
-
-  const size_t n = last1 - first1;
-
-  if(n != 0)
-  {
-    InputIterator2 last2 = first2 + n;
-
-    // compute head flags
-    thrust::detail::temporary_array<HeadFlagType,DerivedPolicy> flags(exec, n);
-    flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred));
-
-    // shift input one to the right and initialize segments with init
-    thrust::detail::temporary_array<OutputType,DerivedPolicy> temp(exec, n);
-    thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate<HeadFlagType>(), init);
-    temp[0] = init;
-
-    // scan key-flag tuples, 
-    // For additional details refer to Section 2 of the following paper
-    //    S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs"
-    //    NVIDIA Technical Report NVR-2008-003, December 2008
-    //    http://mgarland.org/files/papers/nvr-2008-003.pdf
-    thrust::inclusive_scan(exec,
-                           thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())),
-                           thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n,
-                           thrust::make_zip_iterator(thrust::make_tuple(result,       flags.begin())),
-                           detail::segmented_scan_functor<OutputType, HeadFlagType, AssociativeOperator>(binary_op));
-  }
-
-  return result + n;
-}
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/scatter.h b/compat/thrust/system/detail/generic/scatter.h
deleted file mode 100644
index 858d11adc1..0000000000
--- a/compat/thrust/system/detail/generic/scatter.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(thrust::execution_policy<DerivedPolicy> &exec,
-               InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator output);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/scatter.inl>
-
diff --git a/compat/thrust/system/detail/generic/scatter.inl b/compat/thrust/system/detail/generic/scatter.inl
deleted file mode 100644
index 8c40359844..0000000000
--- a/compat/thrust/system/detail/generic/scatter.inl
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/scatter.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/functional.h>
-#include <thrust/transform.h>
-#include <thrust/iterator/permutation_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename RandomAccessIterator>
-  void scatter(thrust::execution_policy<DerivedPolicy> &exec,
-               InputIterator1 first,
-               InputIterator1 last,
-               InputIterator2 map,
-               RandomAccessIterator output)
-{
-  thrust::transform(exec,
-                    first,
-                    last,
-                    thrust::make_permutation_iterator(output, map),
-                    thrust::identity<typename thrust::iterator_value<InputIterator1>::type>());
-} // end scatter()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator>
-  void scatter_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output)
-{
-  // default predicate is identity
-  typedef typename thrust::iterator_value<InputIterator3>::type StencilType;
-  thrust::scatter_if(exec, first, last, map, stencil, output, thrust::identity<StencilType>());
-} // end scatter_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename RandomAccessIterator,
-         typename Predicate>
-  void scatter_if(thrust::execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 first,
-                  InputIterator1 last,
-                  InputIterator2 map,
-                  InputIterator3 stencil,
-                  RandomAccessIterator output,
-                  Predicate pred)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type InputType;
-  thrust::transform_if(exec, first, last, stencil, thrust::make_permutation_iterator(output, map), thrust::identity<InputType>(), pred);
-} // end scatter_if()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/select_system.h b/compat/thrust/system/detail/generic/select_system.h
deleted file mode 100644
index 250a0bce44..0000000000
--- a/compat/thrust/system/detail/generic/select_system.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/is_metafunction_defined.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/system/detail/generic/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace select_system_detail
-{
-
-
-// min_system case 1: both systems have the same type, just return the first one
-template<typename System>
-__host__ __device__
-System &min_system(thrust::execution_policy<System> &system1,
-                   thrust::execution_policy<System> &)
-{
-  return thrust::detail::derived_cast(system1);
-} // end min_system()
-
-
-// min_system case 2: systems have differing type and the first type is considered the minimum
-template<typename System1, typename System2>
-__host__ __device__
-  typename thrust::detail::enable_if<
-    thrust::detail::is_same<
-      System1,
-      typename thrust::detail::minimum_system<System1,System2>::type
-    >::value,
-    System1 &
-  >::type
-    min_system(thrust::execution_policy<System1> &system1, thrust::execution_policy<System2> &)
-{
-  return thrust::detail::derived_cast(system1);
-} // end min_system()
-
-
-// min_system case 3: systems have differing type and the second type is considered the minimum
-template<typename System1, typename System2>
-__host__ __device__
-  typename thrust::detail::enable_if<
-    thrust::detail::is_same<
-      System2,
-      typename thrust::detail::minimum_system<System1,System2>::type
-    >::value,
-    System2 &
-  >::type
-    min_system(thrust::execution_policy<System1> &, thrust::execution_policy<System2> &system2)
-{
-  return thrust::detail::derived_cast(system2);
-} // end min_system()
-
-
-} // end select_system_detail
-
-
-template<typename System>
-__host__ __device__
-  typename thrust::detail::disable_if<
-    select_system1_exists<System>::value,
-    System &
-  >::type
-    select_system(thrust::execution_policy<System> &system)
-{
-  return thrust::detail::derived_cast(system);
-} // end select_system()
-
-
-template<typename System1, typename System2>
-__host__ __device__
-  typename thrust::detail::enable_if_defined<
-    thrust::detail::minimum_system<System1,System2>
-  >::type
-    &select_system(thrust::execution_policy<System1> &system1,
-                   thrust::execution_policy<System2> &system2)
-{
-  return select_system_detail::min_system(system1,system2);
-} // end select_system()
-
-
-template<typename System1, typename System2, typename System3>
-__host__ __device__
-  typename thrust::detail::lazy_disable_if<
-    select_system3_exists<System1,System2,System3>::value,
-    thrust::detail::minimum_system<System1,System2,System3>
-  >::type
-    &select_system(thrust::execution_policy<System1> &system1,
-                   thrust::execution_policy<System2> &system2,
-                   thrust::execution_policy<System3> &system3)
-{
-  return select_system(select_system(system1,system2), system3);
-} // end select_system()
-
-
-template<typename System1, typename System2, typename System3, typename System4>
-__host__ __device__
-  typename thrust::detail::lazy_disable_if<
-    select_system4_exists<System1,System2,System3,System4>::value,
-    thrust::detail::minimum_system<System1,System2,System3,System4>
-  >::type
-    &select_system(thrust::execution_policy<System1> &system1,
-                   thrust::execution_policy<System2> &system2,
-                   thrust::execution_policy<System3> &system3,
-                   thrust::execution_policy<System4> &system4)
-{
-  return select_system(select_system(system1,system2,system3), system4);
-} // end select_system()
-
-
-template<typename System1, typename System2, typename System3, typename System4, typename System5>
-__host__ __device__
-  typename thrust::detail::lazy_disable_if<
-    select_system5_exists<System1,System2,System3,System4,System5>::value,
-    thrust::detail::minimum_system<System1,System2,System3,System4,System5>
-  >::type
-    &select_system(thrust::execution_policy<System1> &system1,
-                   thrust::execution_policy<System2> &system2,
-                   thrust::execution_policy<System3> &system3,
-                   thrust::execution_policy<System4> &system4,
-                   thrust::execution_policy<System5> &system5)
-{
-  return select_system(select_system(system1,system2,system3,system4), system5);
-} // end select_system()
-
-
-template<typename System1, typename System2, typename System3, typename System4, typename System5, typename System6>
-__host__ __device__
-  typename thrust::detail::lazy_disable_if<
-    select_system6_exists<System1,System2,System3,System4,System5,System6>::value,
-    thrust::detail::minimum_system<System1,System2,System3,System4,System5,System6>
-  >::type
-    &select_system(thrust::execution_policy<System1> &system1,
-                   thrust::execution_policy<System2> &system2,
-                   thrust::execution_policy<System3> &system3,
-                   thrust::execution_policy<System4> &system4,
-                   thrust::execution_policy<System5> &system5,
-                   thrust::execution_policy<System6> &system6)
-{
-  return select_system(select_system(system1,system2,system3,system4,system5), system6);
-} // end select_system()
-
-
-// map a single any_system_tag to device_system_tag
-inline __host__ __device__
-thrust::device_system_tag select_system(thrust::any_system_tag)
-{
-  return thrust::device_system_tag();
-} // end select_system()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/sequence.h b/compat/thrust/system/detail/generic/sequence.h
deleted file mode 100644
index b23a7b5d22..0000000000
--- a/compat/thrust/system/detail/generic/sequence.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last);
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init);
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/sequence.inl>
-
diff --git a/compat/thrust/system/detail/generic/sequence.inl b/compat/thrust/system/detail/generic/sequence.inl
deleted file mode 100644
index 45aec69829..0000000000
--- a/compat/thrust/system/detail/generic/sequence.inl
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/sequence.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/functional.h>
-#include <thrust/tabulate.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type T;
-
-  thrust::sequence(exec, first, last, T(0));
-} // end sequence()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init)
-{
-  thrust::sequence(exec, first, last, init, T(1));
-} // end sequence()
-
-
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void sequence(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                T init,
-                T step)
-{
-  thrust::tabulate(exec, first, last, init + step * thrust::placeholders::_1);
-} // end sequence()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/set_operations.h b/compat/thrust/system/detail/generic/set_operations.h
deleted file mode 100644
index 1ca8d391de..0000000000
--- a/compat/thrust/system/detail/generic/set_operations.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator1                             first1,
-                                InputIterator1                             last1,
-                                InputIterator2                             first2,
-                                InputIterator2                             last2,
-                                OutputIterator                             result);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_difference(thrust::execution_policy<ExecutionPolicy> &exec,
-                                InputIterator1                             first1,
-                                InputIterator1                             last1,
-                                InputIterator2                             first2,
-                                InputIterator2                             last2,
-                                OutputIterator                             result,
-                                StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                          InputIterator1                             keys_first1,
-                          InputIterator1                             keys_last1,
-                          InputIterator2                             keys_first2,
-                          InputIterator2                             keys_last2,
-                          InputIterator3                             values_first1,
-                          InputIterator4                             values_first2,
-                          OutputIterator1                            keys_result,
-                          OutputIterator2                            values_result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                          InputIterator1                             keys_first1,
-                          InputIterator1                             keys_last1,
-                          InputIterator2                             keys_first2,
-                          InputIterator2                             keys_last2,
-                          InputIterator3                             values_first1,
-                          InputIterator4                             values_first2,
-                          OutputIterator1                            keys_result,
-                          OutputIterator2                            values_result,
-                          StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(thrust::execution_policy<ExecutionPolicy> &system,
-                                  InputIterator1                             first1,
-                                  InputIterator1                             last1,
-                                  InputIterator2                             first2,
-                                  InputIterator2                             last2,
-                                  OutputIterator                             result);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_intersection(thrust::execution_policy<StrictWeakOrdering> &system,
-                                  InputIterator1                                first1,
-                                  InputIterator1                                last1,
-                                  InputIterator2                                first2,
-                                  InputIterator2                                last2,
-                                  OutputIterator                                result,
-                                  StrictWeakOrdering                            comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                            InputIterator1                             keys_first1,
-                            InputIterator1                             keys_last1,
-                            InputIterator2                             keys_first2,
-                            InputIterator2                             keys_last2,
-                            InputIterator3                             values_first1,
-                            OutputIterator1                            keys_result,
-                            OutputIterator2                            values_result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                            InputIterator1                             keys_first1,
-                            InputIterator1                             keys_last1,
-                            InputIterator2                             keys_first2,
-                            InputIterator2                             keys_last2,
-                            InputIterator3                             values_first1,
-                            OutputIterator1                            keys_result,
-                            OutputIterator2                            values_result,
-                            StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(thrust::execution_policy<ExecutionPolicy> &system,
-                                          InputIterator1                             first1,
-                                          InputIterator1                             last1,
-                                          InputIterator2                             first2,
-                                          InputIterator2                             last2,
-                                          OutputIterator                             result);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_symmetric_difference(thrust::execution_policy<ExecutionPolicy> &system,
-                                          InputIterator1                             first1,
-                                          InputIterator1                             last1,
-                                          InputIterator2                             first2,
-                                          InputIterator2                             last2,
-                                          OutputIterator                             result,
-                                          StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                                    InputIterator1                             keys_first1,
-                                    InputIterator1                             keys_last1,
-                                    InputIterator2                             keys_first2,
-                                    InputIterator2                             keys_last2,
-                                    InputIterator3                             values_first1,
-                                    InputIterator4                             values_first2,
-                                    OutputIterator1                            keys_result,
-                                    OutputIterator2                            values_result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                                    InputIterator1                             keys_first1,
-                                    InputIterator1                             keys_last1,
-                                    InputIterator2                             keys_first2,
-                                    InputIterator2                             keys_last2,
-                                    InputIterator3                             values_first1,
-                                    InputIterator4                             values_first2,
-                                    OutputIterator1                            keys_result,
-                                    OutputIterator2                            values_result,
-                                    StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(thrust::execution_policy<ExecutionPolicy> &system,
-                           InputIterator1                             first1,
-                           InputIterator1                             last1,
-                           InputIterator2                             first2,
-                           InputIterator2                             last2,
-                           OutputIterator                             result);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_union(thrust::execution_policy<ExecutionPolicy> &system,
-                           InputIterator1                             first1,
-                           InputIterator1                             last1,
-                           InputIterator2                             first2,
-                           InputIterator2                             last2,
-                           OutputIterator                             result,
-                           StrictWeakOrdering                         comp);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                     InputIterator1                             keys_first1,
-                     InputIterator1                             keys_last1,
-                     InputIterator2                             keys_first2,
-                     InputIterator2                             keys_last2,
-                     InputIterator3                             values_first1,
-                     InputIterator4                             values_first2,
-                     OutputIterator1                            keys_result,
-                     OutputIterator2                            values_result);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(thrust::execution_policy<ExecutionPolicy> &system,
-                     InputIterator1                             keys_first1,
-                     InputIterator1                             keys_last1,
-                     InputIterator2                             keys_first2,
-                     InputIterator2                             keys_last2,
-                     InputIterator3                             values_first1,
-                     InputIterator4                             values_first2,
-                     OutputIterator1                            keys_result,
-                     OutputIterator2                            values_result,
-                     StrictWeakOrdering                         comp);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/set_operations.inl>
-
diff --git a/compat/thrust/system/detail/generic/set_operations.inl b/compat/thrust/system/detail/generic/set_operations.inl
deleted file mode 100644
index bac9ccd671..0000000000
--- a/compat/thrust/system/detail/generic/set_operations.inl
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/system/detail/generic/set_operations.h>
-#include <thrust/functional.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/zip_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator1                           first1,
-                                InputIterator1                           last1,
-                                InputIterator2                           first2,
-                                InputIterator2                           last2,
-                                OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_difference(exec, first1, last1, first2, last2, result, thrust::less<value_type>());
-} // end set_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator1                           keys_first1,
-                          InputIterator1                           keys_last1,
-                          InputIterator2                           keys_first2,
-                          InputIterator2                           keys_last2,
-                          InputIterator3                           values_first1,
-                          InputIterator4                           values_first2,
-                          OutputIterator1                          keys_result,
-                          OutputIterator2                          values_result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less<value_type>());
-} // end set_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_difference_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                          InputIterator1                           keys_first1,
-                          InputIterator1                           keys_last1,
-                          InputIterator2                           keys_first2,
-                          InputIterator2                           keys_last2,
-                          InputIterator3                           values_first1,
-                          InputIterator4                           values_first2,
-                          OutputIterator1                          keys_result,
-                          OutputIterator2                          values_result,
-                          StrictWeakOrdering                       comp)
-{
-  typedef thrust::tuple<InputIterator1, InputIterator3>   iterator_tuple1;
-  typedef thrust::tuple<InputIterator2, InputIterator4>   iterator_tuple2;
-  typedef thrust::tuple<OutputIterator1, OutputIterator2> iterator_tuple3;
-
-  typedef thrust::zip_iterator<iterator_tuple1> zip_iterator1;
-  typedef thrust::zip_iterator<iterator_tuple2> zip_iterator2;
-  typedef thrust::zip_iterator<iterator_tuple3> zip_iterator3;
-
-  zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1));
-  zip_iterator1 zipped_last1  = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1));
-
-  zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2));
-  zip_iterator2 zipped_last2  = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2));
-
-  zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result));
-
-  thrust::detail::compare_first<StrictWeakOrdering> comp_first(comp);
-
-  iterator_tuple3 result = thrust::set_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple();
-
-  return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result));
-} // end set_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_intersection(thrust::execution_policy<DerivedPolicy> &exec,
-                                  InputIterator1                           first1,
-                                  InputIterator1                           last1,
-                                  InputIterator2                           first2,
-                                  InputIterator2                           last2,
-                                  OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_intersection(exec, first1, last1, first2, last2, result, thrust::less<value_type>());
-} // end set_intersection()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                            InputIterator1                           keys_first1,
-                            InputIterator1                           keys_last1,
-                            InputIterator2                           keys_first2,
-                            InputIterator2                           keys_last2,
-                            InputIterator3                           values_first1,
-                            OutputIterator1                          keys_result,
-                            OutputIterator2                          values_result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_intersection_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, thrust::less<value_type>());
-} // end set_intersection_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_intersection_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                            InputIterator1                           keys_first1,
-                            InputIterator1                           keys_last1,
-                            InputIterator2                           keys_first2,
-                            InputIterator2                           keys_last2,
-                            InputIterator3                           values_first1,
-                            OutputIterator1                          keys_result,
-                            OutputIterator2                          values_result,
-                            StrictWeakOrdering                       comp)
-{
-  typedef thrust::tuple<InputIterator1, InputIterator3>   iterator_tuple1;
-  typedef thrust::tuple<InputIterator2, InputIterator2>   iterator_tuple2;
-  typedef thrust::tuple<OutputIterator1, OutputIterator2> iterator_tuple3;
-
-  typedef thrust::zip_iterator<iterator_tuple1> zip_iterator1;
-  typedef thrust::zip_iterator<iterator_tuple2> zip_iterator2;
-  typedef thrust::zip_iterator<iterator_tuple3> zip_iterator3;
-
-  // fabricate a values_first2 by "sending" keys twice
-  // it should never be dereferenced by set_intersection
-  InputIterator2 values_first2 = keys_first2;
-
-  zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1));
-  zip_iterator1 zipped_last1  = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1));
-
-  zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2));
-  zip_iterator2 zipped_last2  = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2));
-
-  zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result));
-
-  thrust::detail::compare_first<StrictWeakOrdering> comp_first(comp);
-
-  iterator_tuple3 result = thrust::set_intersection(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple();
-
-  return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result));
-} // end set_intersection_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_symmetric_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                          InputIterator1                           first1,
-                                          InputIterator1                           last1,
-                                          InputIterator2                           first2,
-                                          InputIterator2                           last2,
-                                          OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_symmetric_difference(exec, first1, last1, first2, last2, result, thrust::less<value_type>());
-} // end set_symmetric_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                    InputIterator1                           keys_first1,
-                                    InputIterator1                           keys_last1,
-                                    InputIterator2                           keys_first2,
-                                    InputIterator2                           keys_last2,
-                                    InputIterator3                           values_first1,
-                                    InputIterator4                           values_first2,
-                                    OutputIterator1                          keys_result,
-                                    OutputIterator2                          values_result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_symmetric_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less<value_type>());
-} // end set_symmetric_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_symmetric_difference_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                                    InputIterator1                           keys_first1,
-                                    InputIterator1                           keys_last1,
-                                    InputIterator2                           keys_first2,
-                                    InputIterator2                           keys_last2,
-                                    InputIterator3                           values_first1,
-                                    InputIterator4                           values_first2,
-                                    OutputIterator1                          keys_result,
-                                    OutputIterator2                          values_result,
-                                    StrictWeakOrdering                       comp)
-{
-  typedef thrust::tuple<InputIterator1, InputIterator3>   iterator_tuple1;
-  typedef thrust::tuple<InputIterator2, InputIterator4>   iterator_tuple2;
-  typedef thrust::tuple<OutputIterator1, OutputIterator2> iterator_tuple3;
-
-  typedef thrust::zip_iterator<iterator_tuple1> zip_iterator1;
-  typedef thrust::zip_iterator<iterator_tuple2> zip_iterator2;
-  typedef thrust::zip_iterator<iterator_tuple3> zip_iterator3;
-
-  zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1));
-  zip_iterator1 zipped_last1  = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1));
-
-  zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2));
-  zip_iterator2 zipped_last2  = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2));
-
-  zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result));
-
-  thrust::detail::compare_first<StrictWeakOrdering> comp_first(comp);
-
-  iterator_tuple3 result = thrust::set_symmetric_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple();
-
-  return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result));
-} // end set_symmetric_difference_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator>
-  OutputIterator set_union(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1                           first1,
-                           InputIterator1                           last1,
-                           InputIterator2                           first2,
-                           InputIterator2                           last2,
-                           OutputIterator                           result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_union(exec, first1, last1, first2, last2, result, thrust::less<value_type>());
-} // end set_union()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                     InputIterator1                           keys_first1,
-                     InputIterator1                           keys_last1,
-                     InputIterator2                           keys_first2,
-                     InputIterator2                           keys_last2,
-                     InputIterator3                           values_first1,
-                     InputIterator4                           values_first2,
-                     OutputIterator1                          keys_result,
-                     OutputIterator2                          values_result)
-{
-  typedef typename thrust::iterator_value<InputIterator1>::type value_type;
-  return thrust::set_union_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less<value_type>());
-} // end set_union_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    set_union_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                     InputIterator1                           keys_first1,
-                     InputIterator1                           keys_last1,
-                     InputIterator2                           keys_first2,
-                     InputIterator2                           keys_last2,
-                     InputIterator3                           values_first1,
-                     InputIterator4                           values_first2,
-                     OutputIterator1                          keys_result,
-                     OutputIterator2                          values_result,
-                     StrictWeakOrdering                       comp)
-{
-  typedef thrust::tuple<InputIterator1, InputIterator3>   iterator_tuple1;
-  typedef thrust::tuple<InputIterator2, InputIterator4>   iterator_tuple2;
-  typedef thrust::tuple<OutputIterator1, OutputIterator2> iterator_tuple3;
-
-  typedef thrust::zip_iterator<iterator_tuple1> zip_iterator1;
-  typedef thrust::zip_iterator<iterator_tuple2> zip_iterator2;
-  typedef thrust::zip_iterator<iterator_tuple3> zip_iterator3;
-
-  zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1));
-  zip_iterator1 zipped_last1  = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1));
-
-  zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2));
-  zip_iterator2 zipped_last2  = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2));
-
-  zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result));
-
-  thrust::detail::compare_first<StrictWeakOrdering> comp_first(comp);
-
-  iterator_tuple3 result = thrust::set_union(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple();
-
-  return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result));
-} // end set_union_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                InputIterator1                           first1,
-                                InputIterator1                           last1,
-                                InputIterator2                           first2,
-                                InputIterator2                           last2,
-                                OutputIterator                           result,
-                                StrictWeakOrdering                       comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator1, false>::value) );
-  return result;
-} // end set_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_intersection(thrust::execution_policy<DerivedPolicy> &exec,
-                                  InputIterator1                           first1,
-                                  InputIterator1                           last1,
-                                  InputIterator2                           first2,
-                                  InputIterator2                           last2,
-                                  OutputIterator                           result,
-                                  StrictWeakOrdering                       comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator1, false>::value) );
-  return result;
-} // end set_intersection()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_symmetric_difference(thrust::execution_policy<DerivedPolicy> &exec,
-                                          InputIterator1                           first1,
-                                          InputIterator1                           last1,
-                                          InputIterator2                           first2,
-                                          InputIterator2                           last2,
-                                          OutputIterator                           result,
-                                          StrictWeakOrdering                       comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator1, false>::value) );
-  return result;
-} // end set_symmetric_difference()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_union(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1                           first1,
-                           InputIterator1                           last1,
-                           InputIterator2                           first2,
-                           InputIterator2                           last2,
-                           OutputIterator                           result,
-                           StrictWeakOrdering                       comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator1, false>::value) );
-  return result;
-} // end set_union()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/sort.h b/compat/thrust/system/detail/generic/sort.h
deleted file mode 100644
index 5498708a3b..0000000000
--- a/compat/thrust/system/detail/generic/sort.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-  void sort(thrust::execution_policy<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(thrust::execution_policy<DerivedPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator>
-  void stable_sort(thrust::execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(thrust::execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp);
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first);
-
-
-// XXX it is an error to call this function; it has no implementation
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(thrust::execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp);
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  bool is_sorted(thrust::execution_policy<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last);
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Compare>
-  bool is_sorted(thrust::execution_policy<DerivedPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp);
-
-
-template<typename DerivedPolicy, typename ForwardIterator>
-  ForwardIterator is_sorted_until(thrust::execution_policy<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last);
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Compare>
-  ForwardIterator is_sorted_until(thrust::execution_policy<DerivedPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp);
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
-#include <thrust/system/detail/generic/sort.inl>
-
diff --git a/compat/thrust/system/detail/generic/sort.inl b/compat/thrust/system/detail/generic/sort.inl
deleted file mode 100644
index aabb2eed81..0000000000
--- a/compat/thrust/system/detail/generic/sort.inl
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/sort.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/functional.h>
-#include <thrust/find.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/tuple.h>
-#include <thrust/detail/internal_functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator>
-  void sort(thrust::execution_policy<ExecutionPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type; 
-  thrust::sort(exec, first, last, thrust::less<value_type>());
-} // end sort()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void sort(thrust::execution_policy<ExecutionPolicy> &exec,
-            RandomAccessIterator first,
-            RandomAccessIterator last,
-            StrictWeakOrdering comp)
-{
-  // implement with stable_sort
-  thrust::stable_sort(exec, first, last, comp);
-} // end sort()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void sort_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type;
-  thrust::sort_by_key(exec, keys_first, keys_last, values_first, thrust::less<value_type>());
-} // end sort_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void sort_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                   RandomAccessIterator1 keys_first,
-                   RandomAccessIterator1 keys_last,
-                   RandomAccessIterator2 values_first,
-                   StrictWeakOrdering comp)
-{
-  // implement with stable_sort_by_key
-  thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp);
-} // end sort_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator>
-  void stable_sort(thrust::execution_policy<ExecutionPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
-  thrust::stable_sort(exec, first, last, thrust::less<value_type>());
-} // end stable_sort()
-
-
-template<typename ExecutionPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-  void stable_sort_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first)
-{
-  typedef typename iterator_value<RandomAccessIterator1>::type value_type;
-  thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, thrust::less<value_type>());
-} // end stable_sort_by_key()
-
-
-template<typename ExecutionPolicy, typename ForwardIterator>
-  bool is_sorted(thrust::execution_policy<ExecutionPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last)
-{
-  return thrust::is_sorted_until(exec, first, last) == last;
-} // end is_sorted()
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Compare>
-  bool is_sorted(thrust::execution_policy<ExecutionPolicy> &exec,
-                 ForwardIterator first,
-                 ForwardIterator last,
-                 Compare comp)
-{
-  return thrust::is_sorted_until(exec, first, last, comp) == last;
-} // end is_sorted()
-
-
-template<typename ExecutionPolicy, typename ForwardIterator>
-  ForwardIterator is_sorted_until(thrust::execution_policy<ExecutionPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last)
-{
-  typedef typename thrust::iterator_value<ForwardIterator>::type InputType;
-
-  return thrust::is_sorted_until(exec, first, last, thrust::less<InputType>());
-} // end is_sorted_until()
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Compare>
-  ForwardIterator is_sorted_until(thrust::execution_policy<ExecutionPolicy> &exec,
-                                  ForwardIterator first,
-                                  ForwardIterator last,
-                                  Compare comp)
-{
-  if(thrust::distance(first,last) < 2) return last;
-
-  typedef thrust::tuple<ForwardIterator,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple>            ZipIterator;
-
-  ForwardIterator first_plus_one = first;
-  thrust::advance(first_plus_one, 1);
-
-  ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first_plus_one, first));
-  ZipIterator zipped_last  = thrust::make_zip_iterator(thrust::make_tuple(last, first));
-
-  return thrust::get<0>(thrust::find_if(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate<Compare>(comp)).get_iterator_tuple());
-} // end is_sorted_until()
-
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(tag,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator, false>::value) );
-} // end stable_sort()
-
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(tag,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp)
-{
-  // unimplemented primitive
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator1, false>::value) );
-} // end stable_sort_by_key()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/swap_ranges.h b/compat/thrust/system/detail/generic/swap_ranges.h
deleted file mode 100644
index 5d640d3feb..0000000000
--- a/compat/thrust/system/detail/generic/swap_ranges.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(thrust::execution_policy<DerivedPolicy> &exec,
-                               ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/swap_ranges.inl>
-
diff --git a/compat/thrust/system/detail/generic/swap_ranges.inl b/compat/thrust/system/detail/generic/swap_ranges.inl
deleted file mode 100644
index 0e12d07627..0000000000
--- a/compat/thrust/system/detail/generic/swap_ranges.inl
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/swap_ranges.h>
-#include <thrust/tuple.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/for_each.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-// XXX define this here rather than in internal_functional.h
-// to avoid circular dependence between swap.h & internal_functional.h
-struct swap_pair_elements
-{
-  template <typename Tuple>
-  __host__ __device__
-  void operator()(Tuple t)
-  {
-    // use unqualified swap to allow ADL to catch any user-defined swap
-    using thrust::swap;
-    swap(thrust::get<0>(t), thrust::get<1>(t));
-  }
-}; // end swap_pair_elements
-
-} // end detail
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  ForwardIterator2 swap_ranges(thrust::execution_policy<DerivedPolicy> &exec,
-                               ForwardIterator1 first1,
-                               ForwardIterator1 last1,
-                               ForwardIterator2 first2)
-{
-  typedef thrust::tuple<ForwardIterator1,ForwardIterator2> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple>              ZipIterator;
-
-  ZipIterator result = thrust::for_each(exec,
-                                        thrust::make_zip_iterator(thrust::make_tuple(first1, first2)),
-                                        thrust::make_zip_iterator(thrust::make_tuple(last1,  first2)),
-                                        detail::swap_pair_elements());
-  return thrust::get<1>(result.get_iterator_tuple());
-} // end swap_ranges()
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/tabulate.h b/compat/thrust/system/detail/generic/tabulate.h
deleted file mode 100644
index e5911b14ec..0000000000
--- a/compat/thrust/system/detail/generic/tabulate.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename UnaryOperation>
-  void tabulate(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op);
-
-template<typename DerivedPolicy,
-         typename OutputIterator,
-         typename Size,
-         typename UnaryOperation>
-  OutputIterator tabulate_n(thrust::execution_policy<DerivedPolicy> &exec,
-                            OutputIterator first,
-                            Size n,
-                            UnaryOperation unary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/tabulate.inl>
-
diff --git a/compat/thrust/system/detail/generic/tabulate.inl b/compat/thrust/system/detail/generic/tabulate.inl
deleted file mode 100644
index d2ffc26df6..0000000000
--- a/compat/thrust/system/detail/generic/tabulate.inl
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tabulate.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/transform.h>
-#include <thrust/distance.h>
-#include <thrust/iterator/counting_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename UnaryOperation>
-  void tabulate(thrust::execution_policy<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op)
-{
-  typedef typename iterator_difference<ForwardIterator>::type difference_type;
-
-  // by default, counting_iterator uses a 64b difference_type on 32b platforms to avoid overflowing its counter.
-  // this causes problems when a zip_iterator is created in transform's implementation -- ForwardIterator is
-  // incremented by a 64b difference_type and some compilers warn
-  // to avoid this, specify the counting_iterator's difference_type to be the same as ForwardIterator's.
-  thrust::counting_iterator<difference_type, thrust::use_default, thrust::use_default, difference_type> iter(0);
-
-  thrust::transform(exec, iter, iter + thrust::distance(first, last), first, unary_op);
-} // end tabulate()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-
diff --git a/compat/thrust/system/detail/generic/tag.h b/compat/thrust/system/detail/generic/tag.h
deleted file mode 100644
index 577d6a37a7..0000000000
--- a/compat/thrust/system/detail/generic/tag.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file generic/tag.h
- *  \brief Implementation of the generic backend's tag.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-// tag exists only to make the generic entry points the least priority match
-// during ADL. tag should not be derived from and is constructible from anything
-struct tag
-{
-  template<typename T>
-  __host__ __device__ inline
-  tag(const T &) {}
-};
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/temporary_buffer.h b/compat/thrust/system/detail/generic/temporary_buffer.h
deleted file mode 100644
index 8cb08b06ae..0000000000
--- a/compat/thrust/system/detail/generic/temporary_buffer.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/pair.h>
-#include <thrust/detail/pointer.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename T, typename DerivedPolicy>
-  thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type>
-    get_temporary_buffer(thrust::execution_policy<DerivedPolicy> &exec, typename thrust::pointer<T,DerivedPolicy>::difference_type n);
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void return_temporary_buffer(thrust::execution_policy<DerivedPolicy> &exec, Pointer p);
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
-#include <thrust/system/detail/generic/temporary_buffer.inl>
-
diff --git a/compat/thrust/system/detail/generic/temporary_buffer.inl b/compat/thrust/system/detail/generic/temporary_buffer.inl
deleted file mode 100644
index 0a6be7ee08..0000000000
--- a/compat/thrust/system/detail/generic/temporary_buffer.inl
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/temporary_buffer.h>
-#include <thrust/detail/pointer.h>
-#include <thrust/detail/malloc_and_free.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename T, typename DerivedPolicy>
-  thrust::pair<thrust::pointer<T,DerivedPolicy>, typename thrust::pointer<T,DerivedPolicy>::difference_type>
-    get_temporary_buffer(thrust::execution_policy<DerivedPolicy> &exec, typename thrust::pointer<T,DerivedPolicy>::difference_type n)
-{
-  thrust::pointer<T,DerivedPolicy> ptr = thrust::malloc<T>(exec, n);
-
-  // check for a failed malloc
-  if(!ptr.get())
-  {
-    n = 0;
-  } // end if
-
-  return thrust::make_pair(ptr, n);
-} // end get_temporary_buffer()
-
-
-template<typename DerivedPolicy, typename Pointer>
-  void return_temporary_buffer(thrust::execution_policy<DerivedPolicy> &exec, Pointer p)
-{
-  thrust::free(exec, p);
-} // end return_temporary_buffer()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/transform.h b/compat/thrust/system/detail/generic/transform.h
deleted file mode 100644
index e98d40291e..0000000000
--- a/compat/thrust/system/detail/generic/transform.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator first,
-                               InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator1 first,
-                               InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator1 first1,
-                               InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/transform.inl>
-
diff --git a/compat/thrust/system/detail/generic/transform.inl b/compat/thrust/system/detail/generic/transform.inl
deleted file mode 100644
index 8f0995328c..0000000000
--- a/compat/thrust/system/detail/generic/transform.inl
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/transform.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/tuple.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/internal_functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op)
-{
-  // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke
-  //     a function which is only __host__ or __device__ by selecting a generic functor
-  //     which is one or the other
-  //     when nvcc is able to deal with this, remove this WAR
-  
-  // given the minimal system, determine the unary transform functor we need
-  typedef typename thrust::detail::unary_transform_functor<DerivedPolicy,UnaryFunction>::type UnaryTransformFunctor;
-
-  // make an iterator tuple
-  typedef thrust::tuple<InputIterator,OutputIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_result =
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(first,result)),
-                     thrust::make_zip_iterator(thrust::make_tuple(last,result)),
-                     UnaryTransformFunctor(op));
-
-  return thrust::get<1>(zipped_result.get_iterator_tuple());
-} // end transform()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op)
-{
-  // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke
-  //     a function which is only __host__ or __device__ by selecting a generic functor
-  //     which is one or the other
-  //     when nvcc is able to deal with this, remove this WAR
-  
-  // given the minimal system, determine the binary transform functor we need
-  typedef typename thrust::detail::binary_transform_functor<DerivedPolicy,BinaryFunction>::type BinaryTransformFunctor;
-
-  // make an iterator tuple
-  typedef thrust::tuple<InputIterator1,InputIterator2,OutputIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_result =
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(first1,first2,result)),
-                     thrust::make_zip_iterator(thrust::make_tuple(last1,first2,result)),
-                     BinaryTransformFunctor(op));
-
-  return thrust::get<2>(zipped_result.get_iterator_tuple());
-} // end transform()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator first,
-                               InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred)
-{
-  // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke
-  //     a function which is only __host__ or __device__ by selecting a generic functor
-  //     which is one or the other
-  //     when nvcc is able to deal with this, remove this WAR
-  
-  // given the minimal system, determine the unary transform_if functor we need
-  typedef typename thrust::detail::unary_transform_if_functor<DerivedPolicy,UnaryFunction,Predicate>::type UnaryTransformIfFunctor;
-
-  // make an iterator tuple
-  typedef thrust::tuple<InputIterator,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_result =
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(first,result)),
-                     thrust::make_zip_iterator(thrust::make_tuple(last,result)),
-                     UnaryTransformIfFunctor(unary_op,pred));
-
-  return thrust::get<1>(zipped_result.get_iterator_tuple());
-} // end transform_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator1 first,
-                               InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction unary_op,
-                               Predicate pred)
-{
-  // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke
-  //     a function which is only __host__ or __device__ by selecting a generic functor
-  //     which is one or the other
-  //     when nvcc is able to deal with this, remove this WAR
-  
-  // given the minimal system, determine the unary transform_if functor we need
-  typedef typename thrust::detail::unary_transform_if_with_stencil_functor<DerivedPolicy,UnaryFunction,Predicate>::type UnaryTransformIfFunctor;
-
-  // make an iterator tuple
-  typedef thrust::tuple<InputIterator1,InputIterator2,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_result =
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(first,stencil,result)),
-                     thrust::make_zip_iterator(thrust::make_tuple(last,stencil,result)),
-                     UnaryTransformIfFunctor(unary_op,pred));
-
-  return thrust::get<2>(zipped_result.get_iterator_tuple());
-} // end transform_if()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(thrust::execution_policy<DerivedPolicy> &exec,
-                               InputIterator1 first1,
-                               InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred)
-{
-  // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke
-  //     a function which is only __host__ or __device__ by selecting a generic functor
-  //     which is one or the other
-  //     when nvcc is able to deal with this, remove this WAR
-  
-  // given the minimal system, determine the binary transform_if functor we need
-  typedef typename thrust::detail::binary_transform_if_functor<DerivedPolicy,BinaryFunction,Predicate>::type BinaryTransformIfFunctor;
-
-  // make an iterator tuple
-  typedef thrust::tuple<InputIterator1,InputIterator2,InputIterator3,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_result =
-    thrust::for_each(exec,
-                     thrust::make_zip_iterator(thrust::make_tuple(first1,first2,stencil,result)),
-                     thrust::make_zip_iterator(thrust::make_tuple(last1,first2,stencil,result)),
-                     BinaryTransformIfFunctor(binary_op,pred));
-
-  return thrust::get<3>(zipped_result.get_iterator_tuple());
-} // end transform_if()
-
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/transform_reduce.h b/compat/thrust/system/detail/generic/transform_reduce.h
deleted file mode 100644
index c1f098f50f..0000000000
--- a/compat/thrust/system/detail/generic/transform_reduce.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(thrust::execution_policy<ExecutionPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/transform_reduce.inl>
-
diff --git a/compat/thrust/system/detail/generic/transform_reduce.inl b/compat/thrust/system/detail/generic/transform_reduce.inl
deleted file mode 100644
index ce8b6a1213..0000000000
--- a/compat/thrust/system/detail/generic/transform_reduce.inl
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/transform_reduce.h>
-#include <thrust/reduce.h>
-#include <thrust/iterator/transform_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(thrust::execution_policy<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op)
-{
-  thrust::transform_iterator<UnaryFunction, InputIterator, OutputType> xfrm_first(first, unary_op);
-  thrust::transform_iterator<UnaryFunction, InputIterator, OutputType> xfrm_last(last, unary_op);
-
-  return thrust::reduce(exec, xfrm_first, xfrm_last, init, binary_op);
-} // end transform_reduce()
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/transform_scan.h b/compat/thrust/system/detail/generic/transform_scan.h
deleted file mode 100644
index 99db86e4d9..0000000000
--- a/compat/thrust/system/detail/generic/transform_scan.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename BinaryFunction>
-  OutputIterator transform_inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          BinaryFunction binary_op);
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/transform_scan.inl>
-
diff --git a/compat/thrust/system/detail/generic/transform_scan.inl b/compat/thrust/system/detail/generic/transform_scan.inl
deleted file mode 100644
index a95ec20e67..0000000000
--- a/compat/thrust/system/detail/generic/transform_scan.inl
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/transform_scan.h>
-#include <thrust/scan.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename BinaryFunction>
-  OutputIterator transform_inclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          BinaryFunction binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if UnaryFunction is AdaptableUnaryFunction
-  //   TemporaryType = AdaptableUnaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<UnaryFunction>::type
-
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::has_result_type<UnaryFunction>::value,
-    thrust::detail::result_type<UnaryFunction>,
-    thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  thrust::transform_iterator<UnaryFunction, InputIterator, ValueType> _first(first, unary_op);
-  thrust::transform_iterator<UnaryFunction, InputIterator, ValueType> _last(last, unary_op);
-
-  return thrust::inclusive_scan(exec, _first, _last, result, binary_op);
-} // end transform_inclusive_scan()
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(thrust::execution_policy<ExecutionPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if UnaryFunction is AdaptableUnaryFunction
-  //   TemporaryType = AdaptableUnaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<UnaryFunction>::type
-
-  typedef typename thrust::detail::eval_if<
-    thrust::detail::has_result_type<UnaryFunction>::value,
-    thrust::detail::result_type<UnaryFunction>,
-    thrust::detail::eval_if<
-      thrust::detail::is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  thrust::transform_iterator<UnaryFunction, InputIterator, ValueType> _first(first, unary_op);
-  thrust::transform_iterator<UnaryFunction, InputIterator, ValueType> _last(last, unary_op);
-
-  return thrust::exclusive_scan(exec, _first, _last, result, init, binary_op);
-} // end transform_exclusive_scan()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-
diff --git a/compat/thrust/system/detail/generic/type_traits.h b/compat/thrust/system/detail/generic/type_traits.h
deleted file mode 100644
index 40113525f3..0000000000
--- a/compat/thrust/system/detail/generic/type_traits.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file generic/type_traits.h
- *  \brief Introspection for free functions defined in generic.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-// forward declaration of any_system_tag for any_conversion below
-struct any_system_tag;
-
-namespace system
-{
-namespace detail
-{
-
-// we must define these traits outside of generic's namespace
-namespace generic_type_traits_ns
-{
-
-typedef char yes;
-typedef char (&no)[2];
-
-struct any_conversion
-{
-  template<typename T> any_conversion(const T &);
-
-  // add this extra constructor to disambiguate conversion from any_system_tag
-  any_conversion(const any_system_tag &);
-};
-
-namespace select_system_exists_ns
-{
-  no select_system(const any_conversion &);
-  no select_system(const any_conversion &, const any_conversion &);
-  no select_system(const any_conversion &, const any_conversion &, const any_conversion &);
-  no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &);
-  no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &);
-  no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &);
-
-  template<typename T> yes check(const T &);
-
-  no check(no);
-
-  template<typename Tag>
-    struct select_system1_exists
-  {
-    static Tag &tag;
-
-    static const bool value = sizeof(check(select_system(tag))) == sizeof(yes);
-  };
-
-  template<typename Tag1, typename Tag2>
-    struct select_system2_exists
-  {
-    static Tag1 &tag1;
-    static Tag2 &tag2;
-
-    static const bool value = sizeof(check(select_system(tag1,tag2))) == sizeof(yes);
-  };
-
-  template<typename Tag1, typename Tag2, typename Tag3>
-    struct select_system3_exists
-  {
-    static Tag1 &tag1;
-    static Tag2 &tag2;
-    static Tag3 &tag3;
-
-    static const bool value = sizeof(check(select_system(tag1,tag2,tag3))) == sizeof(yes);
-  };
-
-  template<typename Tag1, typename Tag2, typename Tag3, typename Tag4>
-    struct select_system4_exists
-  {
-    static Tag1 &tag1;
-    static Tag2 &tag2;
-    static Tag3 &tag3;
-    static Tag4 &tag4;
-
-    static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4))) == sizeof(yes);
-  };
-
-  template<typename Tag1, typename Tag2, typename Tag3, typename Tag4, typename Tag5>
-    struct select_system5_exists
-  {
-    static Tag1 &tag1;
-    static Tag2 &tag2;
-    static Tag3 &tag3;
-    static Tag4 &tag4;
-    static Tag5 &tag5;
-
-    static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5))) == sizeof(yes);
-  };
-
-  template<typename Tag1, typename Tag2, typename Tag3, typename Tag4, typename Tag5, typename Tag6>
-    struct select_system6_exists
-  {
-    static Tag1 &tag1;
-    static Tag2 &tag2;
-    static Tag3 &tag3;
-    static Tag4 &tag4;
-    static Tag5 &tag5;
-    static Tag6 &tag6;
-
-    static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5,tag6))) == sizeof(yes);
-  };
-} // end select_system_exists_ns
-
-} // end generic_type_traits_ns
-
-namespace generic
-{
-
-template<typename Tag>
-  struct select_system1_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system1_exists<Tag>
-{};
-
-template<typename Tag1, typename Tag2>
-  struct select_system2_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system2_exists<Tag1,Tag2>
-{};
-
-template<typename Tag1, typename Tag2, typename Tag3>
-  struct select_system3_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system3_exists<Tag1,Tag2,Tag3>
-{};
-
-template<typename Tag1, typename Tag2, typename Tag3, typename Tag4>
-  struct select_system4_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system4_exists<Tag1,Tag2,Tag3,Tag4>
-{};
-
-template<typename Tag1, typename Tag2, typename Tag3, typename Tag4, typename Tag5>
-  struct select_system5_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system5_exists<Tag1,Tag2,Tag3,Tag4,Tag5>
-{};
-
-template<typename Tag1, typename Tag2, typename Tag3, typename Tag4, typename Tag5, typename Tag6>
-  struct select_system6_exists
-    : generic_type_traits_ns::select_system_exists_ns::select_system6_exists<Tag1,Tag2,Tag3,Tag4,Tag5,Tag6>
-{};
-
-} // end generic
-} // end detail
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.h b/compat/thrust/system/detail/generic/uninitialized_copy.h
deleted file mode 100644
index 67e3e68328..0000000000
--- a/compat/thrust/system/detail/generic/uninitialized_copy.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result);
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename Size,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/uninitialized_copy.inl>
-
diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.inl b/compat/thrust/system/detail/generic/uninitialized_copy.inl
deleted file mode 100644
index 414e6e48fe..0000000000
--- a/compat/thrust/system/detail/generic/uninitialized_copy.inl
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/uninitialized_copy.h>
-#include <thrust/copy.h>
-#include <thrust/for_each.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template<typename InputType,
-         typename OutputType>
-  struct uninitialized_copy_functor
-{
-  template<typename Tuple>
-  __host__ __device__
-  void operator()(Tuple t)
-  {
-    const InputType &in = thrust::get<0>(t);
-    OutputType &out = thrust::get<1>(t);
-
-    ::new(static_cast<void*>(&out)) OutputType(in);
-  } // end operator()()
-}; // end uninitialized_copy_functor
-
-
-// non-trivial copy constructor path
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result,
-                                     thrust::detail::false_type) // has_trivial_copy_constructor
-{
-  // zip up the iterators
-  typedef thrust::tuple<InputIterator,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result));
-  ZipIterator end = begin;
-
-  // get a zip_iterator pointing to the end
-  const typename thrust::iterator_difference<InputIterator>::type n = thrust::distance(first,last);
-  thrust::advance(end, n);
-
-  // create a functor
-  typedef typename iterator_traits<InputIterator>::value_type InputType;
-  typedef typename iterator_traits<ForwardIterator>::value_type OutputType;
-
-  detail::uninitialized_copy_functor<InputType, OutputType> f;
-
-  // do the for_each
-  thrust::for_each(exec, begin, end, f);
-
-  // return the end of the output range
-  return thrust::get<1>(end.get_iterator_tuple());
-} // end uninitialized_copy()
-
-
-// trivial copy constructor path
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result,
-                                     thrust::detail::true_type) // has_trivial_copy_constructor
-{
-  return thrust::copy(exec, first, last, result);
-} // end uninitialized_copy()
-
-
-// non-trivial copy constructor path
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename Size,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result,
-                                       thrust::detail::false_type) // has_trivial_copy_constructor
-{
-  // zip up the iterators
-  typedef thrust::tuple<InputIterator,ForwardIterator> IteratorTuple;
-  typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
-
-  ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first,result));
-
-  // create a functor
-  typedef typename iterator_traits<InputIterator>::value_type   InputType;
-  typedef typename iterator_traits<ForwardIterator>::value_type OutputType;
-
-  detail::uninitialized_copy_functor<InputType, OutputType> f;
-
-  // do the for_each_n
-  ZipIterator zipped_last = thrust::for_each_n(exec, zipped_first, n, f);
-
-  // return the end of the output range
-  return thrust::get<1>(zipped_last.get_iterator_tuple());
-} // end uninitialized_copy_n()
-
-
-// trivial copy constructor path
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename Size,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result,
-                                       thrust::detail::true_type) // has_trivial_copy_constructor
-{
-  return thrust::copy_n(exec, first, n, result);
-} // end uninitialized_copy_n()
-
-
-} // end detail
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result)
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ResultType;
-
-  typedef typename thrust::detail::has_trivial_copy_constructor<ResultType>::type ResultTypeHasTrivialCopyConstructor;
-
-  return thrust::system::detail::generic::detail::uninitialized_copy(exec, first, last, result, ResultTypeHasTrivialCopyConstructor());
-} // end uninitialized_copy()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename Size,
-         typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(thrust::execution_policy<ExecutionPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result)
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ResultType;
-
-  typedef typename thrust::detail::has_trivial_copy_constructor<ResultType>::type ResultTypeHasTrivialCopyConstructor;
-
-  return thrust::system::detail::generic::detail::uninitialized_copy_n(exec, first, n, result, ResultTypeHasTrivialCopyConstructor());
-} // end uninitialized_copy_n()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.h b/compat/thrust/system/detail/generic/uninitialized_fill.h
deleted file mode 100644
index c1df694c02..0000000000
--- a/compat/thrust/system/detail/generic/uninitialized_fill.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  void uninitialized_fill(thrust::execution_policy<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x);
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Size,
-         typename T>
-  ForwardIterator uninitialized_fill_n(thrust::execution_policy<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x);
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/uninitialized_fill.inl>
-
diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.inl b/compat/thrust/system/detail/generic/uninitialized_fill.inl
deleted file mode 100644
index bb30b24fd7..0000000000
--- a/compat/thrust/system/detail/generic/uninitialized_fill.inl
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/uninitialized_fill.h>
-#include <thrust/fill.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  void uninitialized_fill(thrust::execution_policy<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x,
-                          thrust::detail::true_type) // has_trivial_copy_constructor
-{
-  thrust::fill(exec, first, last, x);
-} // end uninitialized_fill()
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  void uninitialized_fill(thrust::execution_policy<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x,
-                          thrust::detail::false_type) // has_trivial_copy_constructor
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ValueType;
-
-  thrust::for_each(exec, first, last, thrust::detail::uninitialized_fill_functor<ValueType>(x));
-} // end uninitialized_fill()
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Size,
-         typename T>
-  ForwardIterator uninitialized_fill_n(thrust::execution_policy<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x,
-                                       thrust::detail::true_type) // has_trivial_copy_constructor
-{
-  return thrust::fill_n(exec, first, n, x);
-} // end uninitialized_fill()
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Size,
-         typename T>
-  ForwardIterator uninitialized_fill_n(thrust::execution_policy<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x,
-                                       thrust::detail::false_type) // has_trivial_copy_constructor
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ValueType;
-
-  return thrust::for_each_n(exec, first, n, thrust::detail::uninitialized_fill_functor<ValueType>(x));
-} // end uninitialized_fill()
-
-} // end detail
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename T>
-  void uninitialized_fill(thrust::execution_policy<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x)
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ValueType;
-
-  typedef thrust::detail::has_trivial_copy_constructor<ValueType> ValueTypeHasTrivialCopyConstructor;
-
-  thrust::system::detail::generic::detail::uninitialized_fill(exec, first, last, x,
-    ValueTypeHasTrivialCopyConstructor());
-} // end uninitialized_fill()
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Size,
-         typename T>
-  ForwardIterator uninitialized_fill_n(thrust::execution_policy<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x)
-{
-  typedef typename iterator_traits<ForwardIterator>::value_type ValueType;
-
-  typedef thrust::detail::has_trivial_copy_constructor<ValueType> ValueTypeHasTrivialCopyConstructor;
-
-  return thrust::system::detail::generic::detail::uninitialized_fill_n(exec, first, n, x,
-    ValueTypeHasTrivialCopyConstructor());
-} // end uninitialized_fill()
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/unique.h b/compat/thrust/system/detail/generic/unique.h
deleted file mode 100644
index 57e17cafa4..0000000000
--- a/compat/thrust/system/detail/generic/unique.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator>
-ForwardIterator unique(thrust::execution_policy<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last);
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-ForwardIterator unique(thrust::execution_policy<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last,
-                       BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator unique_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator output);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-OutputIterator unique_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator output,
-                           BinaryPredicate binary_pred);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/unique.inl>
-
diff --git a/compat/thrust/system/detail/generic/unique.inl b/compat/thrust/system/detail/generic/unique.inl
deleted file mode 100644
index 42d6b15e82..0000000000
--- a/compat/thrust/system/detail/generic/unique.inl
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file unique.inl
- *  \brief Inline file for unique.h.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/unique.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/transform.h>
-#include <thrust/unique.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/copy_if.h>
-#include <thrust/distance.h>
-#include <thrust/functional.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator>
-  ForwardIterator unique(thrust::execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-
-  return thrust::unique(exec, first, last, thrust::equal_to<InputType>());
-} // end unique()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(thrust::execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator>::value_type InputType;
-  
-  thrust::detail::temporary_array<InputType,DerivedPolicy> input(exec, first, last);
-  
-  return thrust::unique_copy(exec, input.begin(), input.end(), first, binary_pred);
-} // end unique()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator unique_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output)
-{
-  typedef typename thrust::iterator_value<InputIterator>::type value_type;
-  return thrust::unique_copy(exec, first,last,output,thrust::equal_to<value_type>());
-} // end unique_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(thrust::execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  // empty sequence
-  if(first == last)
-    return output;
-  
-  thrust::detail::temporary_array<int,DerivedPolicy> stencil(exec, thrust::distance(first, last));
-  
-  // mark first element in each group
-  stencil[0] = 1; 
-  thrust::transform(exec, first, last - 1, first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); 
-  
-  return thrust::copy_if(exec, first, last, stencil.begin(), output, thrust::identity<int>());
-} // end unique_copy()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/generic/unique_by_key.h b/compat/thrust/system/detail/generic/unique_by_key.h
deleted file mode 100644
index aa62f73e51..0000000000
--- a/compat/thrust/system/detail/generic/unique_by_key.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/tag.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first);
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred);
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/generic/unique_by_key.inl>
-
diff --git a/compat/thrust/system/detail/generic/unique_by_key.inl b/compat/thrust/system/detail/generic/unique_by_key.inl
deleted file mode 100644
index c780fa71d1..0000000000
--- a/compat/thrust/system/detail/generic/unique_by_key.inl
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/unique_by_key.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/iterator/detail/minimum_system.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/transform.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/internal_functional.h>
-#include <thrust/detail/copy_if.h>
-#include <thrust/unique.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace generic
-{
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator1>::value_type KeyType;
-  return thrust::unique_by_key(exec, keys_first, keys_last, values_first, thrust::equal_to<KeyType>());
-} // end unique_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(thrust::execution_policy<ExecutionPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<ForwardIterator1>::value_type InputType1;
-  typedef typename thrust::iterator_traits<ForwardIterator2>::value_type InputType2;
-  
-  ForwardIterator2 values_last = values_first + (keys_last - keys_first);
-  
-  thrust::detail::temporary_array<InputType1,ExecutionPolicy> keys(exec, keys_first, keys_last);
-  thrust::detail::temporary_array<InputType2,ExecutionPolicy> vals(exec, values_first, values_last);
-  
-  return thrust::unique_by_key_copy(exec, keys.begin(), keys.end(), vals.begin(), keys_first, values_first, binary_pred);
-} // end unique_by_key()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type KeyType;
-  return thrust::unique_by_key_copy(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to<KeyType>());
-} // end unique_by_key_copy()
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(thrust::execution_policy<ExecutionPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::difference_type difference_type;
-  
-  // empty sequence
-  if(keys_first == keys_last)
-    return thrust::make_pair(keys_output, values_output);
-  
-  difference_type n = thrust::distance(keys_first, keys_last);
-  
-  thrust::detail::temporary_array<int,ExecutionPolicy> stencil(exec,n);
-  
-  // mark first element in each group
-  stencil[0] = 1; 
-  thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); 
-  
-  thrust::zip_iterator< thrust::tuple<OutputIterator1, OutputIterator2> > result =
-    thrust::copy_if(exec,
-                    thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)),
-                    thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)) + n,
-                    stencil.begin(),
-                    thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output)),
-                    thrust::identity<int>());
-  
-  difference_type output_size = result - thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output));
-                                  
-  return thrust::make_pair(keys_output + output_size, values_output + output_size);
-} // end unique_by_key_copy()
-
-
-} // end namespace generic
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/decompose.h b/compat/thrust/system/detail/internal/decompose.h
deleted file mode 100644
index dea806d69c..0000000000
--- a/compat/thrust/system/detail/internal/decompose.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-
-  template <typename IndexType>
-    class index_range
-    {
-      public:
-        typedef IndexType index_type;
-
-        __host__ __device__
-          index_range(index_type begin, index_type end) : m_begin(begin), m_end(end) {}
-
-        __host__ __device__
-          index_type begin(void) const { return m_begin; }
-
-        __host__ __device__
-          index_type end(void)   const { return m_end; }
-
-        __host__ __device__
-          index_type size(void)  const { return m_end - m_begin; }
-
-      private:
-        index_type m_begin;
-        index_type m_end;
-    };
-
-  template <typename IndexType>
-    class uniform_decomposition
-    {
-      public:
-        typedef IndexType               index_type;
-        typedef index_range<index_type> range_type;
-
-        uniform_decomposition(index_type N, index_type granularity, index_type max_intervals)
-          : m_N(N),
-	    m_intervals((N + granularity - 1) / granularity),
-	    m_threshold(0),
-	    m_small_interval(granularity),
-	    m_large_interval(0)
-        {
-	  if(m_intervals > max_intervals)
-          {
-	    m_small_interval = granularity * (m_intervals / max_intervals);
-	    m_large_interval = m_small_interval + granularity;
-	    m_threshold      = m_intervals % max_intervals;
-	    m_intervals      = max_intervals;
-	  }
-        }
-
-        __host__ __device__
-          index_range<index_type> operator[](const index_type& i) const
-          {
-            if (i < m_threshold)
-            {
-              index_type begin = m_large_interval * i;
-              index_type end   = begin + m_large_interval;
-              return range_type(begin, end);
-            }
-            else
-            {
-              index_type begin = m_large_interval * m_threshold + m_small_interval * (i - m_threshold);
-              index_type end   = (begin + m_small_interval < m_N) ? begin + m_small_interval : m_N;
-              return range_type(begin, end);
-            }
-          }
-
-        __host__ __device__
-          index_type size(void) const
-          {
-            return m_intervals;
-          }
-
-      private:
-
-        index_type m_N;
-        index_type m_intervals;
-        index_type m_threshold;
-        index_type m_small_interval;
-        index_type m_large_interval;
-    };
-
-
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/adjacent_difference.h b/compat/thrust/system/detail/internal/scalar/adjacent_difference.h
deleted file mode 100644
index d1a95aeec4..0000000000
--- a/compat/thrust/system/detail/internal/scalar/adjacent_difference.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file adjacent_difference.h
- *  \brief Sequential implementation of adjacent_difference.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction>
-OutputIterator adjacent_difference(InputIterator first,
-                                   InputIterator last,
-                                   OutputIterator result,
-                                   BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type InputType;
-
-  if (first == last)
-    return result;
-
-  InputType curr = *first;
-
-  *result = curr;
-
-  while (++first != last)
-  {
-    InputType next = *first;
-    *(++result) = binary_op(next, curr);
-    curr = next;
-  }
-
-  return ++result;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/binary_search.h b/compat/thrust/system/detail/internal/scalar/binary_search.h
deleted file mode 100644
index c3ac49fbd3..0000000000
--- a/compat/thrust/system/detail/internal/scalar/binary_search.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file binary_search.h
- *  \brief Sequential implementation of binary search algorithms.
- */
-
-#pragma once
-
-#include <thrust/advance.h>
-#include <thrust/distance.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename ForwardIterator,
-          typename T,
-          typename StrictWeakOrdering>
-ForwardIterator lower_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& val,
-                            StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  typedef typename thrust::iterator_difference<ForwardIterator>::type difference_type;
-
-  difference_type len = thrust::distance(first, last);
-
-  while(len > 0)
-  {
-    difference_type half = len >> 1;
-    ForwardIterator middle = first;
-
-    thrust::advance(middle, half);
-
-    if(wrapped_comp(*middle, val))
-    {
-      first = middle;
-      ++first;
-      len = len - half - 1;
-    }
-    else
-    {
-      len = half;
-    }
-  }
-
-  return first;
-}
-
-
-template <typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator upper_bound(ForwardIterator first,
-                            ForwardIterator last,
-                            const T& val, 
-                            StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  typedef typename thrust::iterator_difference<ForwardIterator>::type difference_type;
-
-  difference_type len = thrust::distance(first, last);
-
-  while(len > 0)
-  {
-    difference_type half = len >> 1;
-    ForwardIterator middle = first;
-
-    thrust::advance(middle, half);
-
-    if(wrapped_comp(val, *middle))
-    {
-      len = half;
-    }
-    else
-    {
-      first = middle;
-      ++first;
-      len = len - half - 1;
-    }
-  }
-
-  return first;
-}
-
-template <typename ForwardIterator,
-          typename T,
-          typename StrictWeakOrdering>
-bool binary_search(ForwardIterator first,
-                   ForwardIterator last,
-                   const T& val, 
-                   StrictWeakOrdering comp)
-{
-  ForwardIterator iter = thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp);
-
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  return iter != last && !wrapped_comp(val,*iter);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/copy.h b/compat/thrust/system/detail/internal/scalar/copy.h
deleted file mode 100644
index 42cb385402..0000000000
--- a/compat/thrust/system/detail/internal/scalar/copy.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file copy.h
- *  \brief Sequential implementations of copy algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result);
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result);
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/copy.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/copy.inl b/compat/thrust/system/detail/internal/scalar/copy.inl
deleted file mode 100644
index 8c9f5c2825..0000000000
--- a/compat/thrust/system/detail/internal/scalar/copy.inl
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/dispatch/is_trivial_copy.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/detail/internal/scalar/general_copy.h>
-#include <thrust/system/detail/internal/scalar/trivial_copy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits/pointer_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-namespace copy_detail
-{
-
-
-// returns the raw pointer associated with a Pointer-like thing
-template<typename Pointer>
-  typename thrust::detail::pointer_traits<Pointer>::raw_pointer
-    get(Pointer ptr)
-{
-  return thrust::detail::pointer_traits<Pointer>::get(ptr);
-}
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::detail::true_type)  // is_trivial_copy
-{
-  typedef typename thrust::iterator_difference<InputIterator>::type Size;
-
-  const Size n = last - first;
-  thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result));
-  return result + n;
-} // end copy()
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::detail::false_type)  // is_trivial_copy
-{
-  return thrust::system::detail::internal::scalar::general_copy(first,last,result);
-} // end copy()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::detail::true_type)  // is_trivial_copy
-{
-  thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result));
-  return result + n;
-} // end copy_n()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::detail::false_type)  // is_trivial_copy
-{
-  return thrust::system::detail::internal::scalar::general_copy_n(first,n,result);
-} // end copy_n()
-
-} // end namespace copy_detail
-
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(InputIterator first,
-                      InputIterator last,
-                      OutputIterator result)
-{
-  return thrust::system::detail::internal::scalar::copy_detail::copy(first, last, result,
-    typename thrust::detail::dispatch::is_trivial_copy<InputIterator,OutputIterator>::type());
-} // end copy()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(InputIterator first,
-                        Size n,
-                        OutputIterator result)
-{
-  return thrust::system::detail::internal::scalar::copy_detail::copy_n(first, n, result,
-    typename thrust::detail::dispatch::is_trivial_copy<InputIterator,OutputIterator>::type());
-} // end copy_n()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/copy_backward.h b/compat/thrust/system/detail/internal/scalar/copy_backward.h
deleted file mode 100644
index 36f8f66e44..0000000000
--- a/compat/thrust/system/detail/internal/scalar/copy_backward.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename BidirectionalIterator1,
-          typename BidirectionalIterator2>
-BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, 
-                                     BidirectionalIterator1 last, 
-                                     BidirectionalIterator2 result)
-{
-  while (first != last)
-  {
-    --last;
-    --result;
-    *result = *last;
-  }
-
-  return result;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/copy_if.h b/compat/thrust/system/detail/internal/scalar/copy_if.h
deleted file mode 100644
index 67f9402335..0000000000
--- a/compat/thrust/system/detail/internal/scalar/copy_if.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file copy_if.h
- *  \brief Sequential implementation of copy_if.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  thrust::detail::host_function<Predicate,bool> wrapped_pred(pred);
-
-  while(first != last)
-  {
-    if(wrapped_pred(*stencil))
-    {
-      *result = *first;
-      ++result;
-    } // end if
-
-    ++first;
-    ++stencil;
-  } // end while
-
-  return result;
-} // end copy_if()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/extrema.h b/compat/thrust/system/detail/internal/scalar/extrema.h
deleted file mode 100644
index ebea756d8c..0000000000
--- a/compat/thrust/system/detail/internal/scalar/extrema.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file extrema.h
- *  \brief Sequential implementations of extrema functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename ForwardIterator,
-          typename BinaryPredicate>
-ForwardIterator min_element(ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    BinaryPredicate,
-    bool
-  > wrapped_comp(comp);
-
-  ForwardIterator imin = first;
-
-  for (; first != last; first++)
-  {
-    if (wrapped_comp(*first, *imin))
-    {
-      imin = first;
-    }
-  }
-
-  return imin;
-}
-
-
-template <typename ForwardIterator,
-          typename BinaryPredicate>
-ForwardIterator max_element(ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    BinaryPredicate,
-    bool
-  > wrapped_comp(comp);
-
-  ForwardIterator imax = first;
-
-  for (; first != last; first++)
-  {
-    if (wrapped_comp(*imax, *first))
-    {
-      imax = first;
-    }
-  }
-
-  return imax;
-}
-
-
-template <typename ForwardIterator,
-          typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    BinaryPredicate,
-    bool
-  > wrapped_comp(comp);
-  
-  ForwardIterator imin = first;
-  ForwardIterator imax = first;
-
-  for (; first != last; first++)
-  {
-    if (wrapped_comp(*first, *imin))
-    {
-      imin = first;
-    }
-
-    if (wrapped_comp(*imax, *first))
-    {
-      imax = first;
-    }
-  }
-
-  return thrust::make_pair(imin, imax);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/find.h b/compat/thrust/system/detail/internal/scalar/find.h
deleted file mode 100644
index 6b2502199e..0000000000
--- a/compat/thrust/system/detail/internal/scalar/find.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file find.h
- *  \brief Sequential implementation of find_if. 
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename InputIterator,
-          typename Predicate>
-InputIterator find_if(InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  while(first != last)
-  {
-    if (wrapped_pred(*first))
-      return first;
-
-    ++first;
-  }
-
-  // return first so zip_iterator works correctly
-  return first;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/for_each.h b/compat/thrust/system/detail/internal/scalar/for_each.h
deleted file mode 100644
index 4e31d9183b..0000000000
--- a/compat/thrust/system/detail/internal/scalar/for_each.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.h
- *  \brief Sequential implementations of for_each functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator,
-         typename UnaryFunction>
-InputIterator for_each(InputIterator first,
-                       InputIterator last,
-                       UnaryFunction f)
-{
-  // wrap f
-  thrust::detail::host_function<
-    UnaryFunction,
-    void
-  > wrapped_f(f);
-
-  for(; first != last; ++first)
-  {
-    wrapped_f(*first);
-  }
-
-  return first;
-} // end for_each()
-
-template<typename InputIterator,
-         typename Size,
-         typename UnaryFunction>
-InputIterator for_each_n(InputIterator first,
-                         Size n,
-                         UnaryFunction f)
-{
-  // wrap f
-  thrust::detail::host_function<
-    UnaryFunction,
-    void
-  > wrapped_f(f);
-
-  for(Size i = 0; i != n; i++)
-  {
-    // we can dereference an OutputIterator if f does not
-    // try to use the reference for anything besides assignment
-    wrapped_f(*first);
-    ++first;
-  }
-
-  return first;
-} // end for_each_n()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/general_copy.h b/compat/thrust/system/detail/internal/scalar/general_copy.h
deleted file mode 100644
index aae061d31d..0000000000
--- a/compat/thrust/system/detail/internal/scalar/general_copy.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file general_copy.h
- *  \brief Sequential copy algorithms for general iterators.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator,
-         typename OutputIterator>
-  OutputIterator general_copy(InputIterator first,
-                              InputIterator last,
-                              OutputIterator result)
-{
-  for(; first != last; ++first, ++result)
-    *result = *first;
-  return result;
-} // end general_copy()
-
-
-template<typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator general_copy_n(InputIterator first,
-                                Size n,
-                                OutputIterator result)
-{
-  for(; n > Size(0); ++first, ++result, --n)
-    *result = *first;
-  return result;
-} // end general_copy_n()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/insertion_sort.h b/compat/thrust/system/detail/internal/scalar/insertion_sort.h
deleted file mode 100644
index 5949ce7a65..0000000000
--- a/compat/thrust/system/detail/internal/scalar/insertion_sort.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-#include <thrust/system/detail/internal/scalar/copy_backward.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename RandomAccessIterator,
-          typename StrictWeakOrdering>
-void insertion_sort(RandomAccessIterator first,
-                    RandomAccessIterator last,
-                    StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
-
-  if (first == last) return;
-
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  for(RandomAccessIterator i = first + 1; i != last; ++i)
-  {
-    value_type tmp = *i;
-
-    if (wrapped_comp(tmp, *first))
-    {
-      // tmp is the smallest value encountered so far
-      thrust::system::detail::internal::scalar::copy_backward(first, i, i + 1);
-
-      *first = tmp;
-    }
-    else
-    {
-      // tmp is not the smallest value, can avoid checking for j == first
-      RandomAccessIterator j = i;
-      RandomAccessIterator k = i - 1;
-
-      while(wrapped_comp(tmp, *k))
-      {
-        *j = *k;
-        j = k;
-        --k;
-      }
-
-      *j = tmp;
-    }
-  }
-}
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename StrictWeakOrdering>
-void insertion_sort_by_key(RandomAccessIterator1 first1,
-                           RandomAccessIterator1 last1,
-                           RandomAccessIterator2 first2,
-                           StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2;
-
-  if (first1 == last1) return;
-
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  RandomAccessIterator1 i1 = first1 + 1;
-  RandomAccessIterator2 i2 = first2 + 1;
-
-  for(; i1 != last1; ++i1, ++i2)
-  {
-    value_type1 tmp1 = *i1;
-    value_type2 tmp2 = *i2;
-
-    if (wrapped_comp(tmp1, *first1))
-    {
-      // tmp is the smallest value encountered so far
-      thrust::system::detail::internal::scalar::copy_backward(first1, i1, i1 + 1);
-      thrust::system::detail::internal::scalar::copy_backward(first2, i2, i2 + 1);
-
-      *first1 = tmp1;
-      *first2 = tmp2;
-    }
-    else
-    {
-      // tmp is not the smallest value, can avoid checking for j == first
-      RandomAccessIterator1 j1 = i1;
-      RandomAccessIterator1 k1 = i1 - 1;
-
-      RandomAccessIterator2 j2 = i2;
-      RandomAccessIterator2 k2 = i2 - 1;
-
-      while(wrapped_comp(tmp1, *k1))
-      {
-        *j1 = *k1;
-        *j2 = *k2;
-
-        j1 = k1;
-        j2 = k2;
-
-        --k1;
-        --k2;
-      }
-
-      *j1 = tmp1;
-      *j2 = tmp2;
-    }
-  }
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/merge.h b/compat/thrust/system/detail/internal/scalar/merge.h
deleted file mode 100644
index c02fca44b6..0000000000
--- a/compat/thrust/system/detail/internal/scalar/merge.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file merge.h
- *  \brief Sequential implementation of merge algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-OutputIterator merge(InputIterator1 first1,
-                     InputIterator1 last1,
-                     InputIterator2 first2,
-                     InputIterator2 last2,
-                     OutputIterator result,
-                     StrictWeakOrdering comp);
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename InputIterator4,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename StrictWeakOrdering>
-thrust::pair<OutputIterator1,OutputIterator2>
-  merge_by_key(InputIterator1 keys_first1,
-               InputIterator1 keys_last1,
-               InputIterator2 keys_first2,
-               InputIterator2 keys_last2,
-               InputIterator3 values_first1,
-               InputIterator4 values_first2,
-               OutputIterator1 keys_result,
-               OutputIterator2 values_result,
-               StrictWeakOrdering comp);
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/merge.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/merge.inl b/compat/thrust/system/detail/internal/scalar/merge.inl
deleted file mode 100644
index a7c2a393c4..0000000000
--- a/compat/thrust/system/detail/internal/scalar/merge.inl
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/iterator/iterator_traits.h>
-
-#include <thrust/system/detail/internal/scalar/copy.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-OutputIterator merge(InputIterator1 first1,
-                     InputIterator1 last1,
-                     InputIterator2 first2,
-                     InputIterator2 last2,
-                     OutputIterator result,
-                     StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(first1 != last1 && first2 != last2)
-  {
-    if(wrapped_comp(*first2, *first1))
-    {
-      *result = *first2;
-      ++first2;
-    } // end if
-    else
-    {
-      *result = *first1;
-      ++first1;
-    } // end else
-
-    ++result;
-  } // end while
-
-  return thrust::system::detail::internal::scalar::copy(first2, last2, thrust::system::detail::internal::scalar::copy(first1, last1, result));
-} // end merge()
-
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename InputIterator4,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename StrictWeakOrdering>
-thrust::pair<OutputIterator1,OutputIterator2>
-  merge_by_key(InputIterator1 keys_first1,
-               InputIterator1 keys_last1,
-               InputIterator2 keys_first2,
-               InputIterator2 keys_last2,
-               InputIterator3 values_first1,
-               InputIterator4 values_first2,
-               OutputIterator1 keys_result,
-               OutputIterator2 values_result,
-               StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(keys_first1 != keys_last1 && keys_first2 != keys_last2)
-  {
-    if(!wrapped_comp(*keys_first2, *keys_first1))
-    {
-      // *keys_first1 <= *keys_first2
-      *keys_result   = *keys_first1;
-      *values_result = *values_first1;
-      ++keys_first1;
-      ++values_first1;
-    }
-    else
-    {
-      // *keys_first1 > keys_first2
-      *keys_result   = *keys_first2;
-      *values_result = *values_first2;
-      ++keys_first2;
-      ++values_first2;
-    }
-
-    ++keys_result;
-    ++values_result;
-  }
-
-  while(keys_first1 != keys_last1)
-  {
-    *keys_result   = *keys_first1;
-    *values_result = *values_first1;
-    ++keys_first1;
-    ++values_first1;
-    ++keys_result;
-    ++values_result;
-  }
-
-  while(keys_first2 != keys_last2)
-  {
-    *keys_result   = *keys_first2;
-    *values_result = *values_first2;
-    ++keys_first2;
-    ++values_first2;
-    ++keys_result;
-    ++values_result;
-  }
-
-  return thrust::make_pair(keys_result, values_result);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/partition.h b/compat/thrust/system/detail/internal/scalar/partition.h
deleted file mode 100644
index 7ba677ef22..0000000000
--- a/compat/thrust/system/detail/internal/scalar/partition.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file partition.h
- *  \brief Sequential implementations of partition functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename ForwardIterator1,
-          typename ForwardIterator2>
-void iter_swap(ForwardIterator1 iter1, ForwardIterator2 iter2)
-{
-  // XXX this isn't correct because it doesn't use thrust::swap
-  using namespace thrust::detail;
-
-  typedef typename thrust::iterator_value<ForwardIterator1>::type T;
-
-  T temp = *iter1;
-  *iter1 = *iter2;
-  *iter2 = temp;
-}
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator partition(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  if (first == last)
-    return first;
-
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  while (wrapped_pred(*first))
-  {
-    if (++first == last)
-      return first;
-  }
-
-  ForwardIterator next = first;
-
-  while (++next != last)
-  {
-    if (wrapped_pred(*next))
-    {
-      iter_swap(first, next);
-      ++first;
-    }
-  }
-
-  return first;
-}
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  // XXX the type of exec should be:
-  //     typedef decltype(select_system(first, last)) system;
-  typedef typename thrust::iterator_system<ForwardIterator>::type ExecutionPolicy;
-  typedef typename thrust::iterator_value<ForwardIterator>::type T;
-
-  typedef thrust::detail::temporary_array<T,ExecutionPolicy> TempRange;
-  typedef typename TempRange::iterator                       TempIterator;
-
-  // XXX presumes ExecutionPolicy is default constructible
-  ExecutionPolicy exec;
-  TempRange temp(exec, first, last);
-
-  for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter)
-  {
-    if (wrapped_pred(*iter))
-    {
-      *first = *iter;
-      ++first;
-    }
-  }
-
-  ForwardIterator middle = first;
-
-  for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter)
-  {
-    if (!wrapped_pred(*iter))
-    {
-      *first = *iter;
-      ++first;
-    }
-  }
-
-  return middle;
-}
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  // XXX the type of exec should be:
-  //     typedef decltype(select_system(first, stencil)) system;
-  typedef typename thrust::iterator_system<ForwardIterator>::type ExecutionPolicy;
-  typedef typename thrust::iterator_value<ForwardIterator>::type T;
-
-  typedef thrust::detail::temporary_array<T,ExecutionPolicy> TempRange;
-  typedef typename TempRange::iterator                       TempIterator;
-
-  // XXX presumes ExecutionPolicy is default constructible
-  ExecutionPolicy exec;
-  TempRange temp(exec, first, last);
-
-  InputIterator stencil_iter = stencil;
-  for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter)
-  {
-    if (wrapped_pred(*stencil_iter))
-    {
-      *first = *iter;
-      ++first;
-    }
-  }
-
-  ForwardIterator middle = first;
-  stencil_iter = stencil;
-
-  for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter)
-  {
-    if (!wrapped_pred(*stencil_iter))
-    {
-      *first = *iter;
-      ++first;
-    }
-  }
-
-  return middle;
-}
-
-template<typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  for(; first != last; ++first)
-  {
-    if(wrapped_pred(*first))
-    {
-      *out_true = *first;
-      ++out_true;
-    } // end if
-    else
-    {
-      *out_false = *first;
-      ++out_false;
-    } // end else
-  }
-
-  return thrust::make_pair(out_true, out_false);
-}
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  for(; first != last; ++first, ++stencil)
-  {
-    if(wrapped_pred(*stencil))
-    {
-      *out_true = *first;
-      ++out_true;
-    } // end if
-    else
-    {
-      *out_false = *first;
-      ++out_false;
-    } // end else
-  }
-
-  return thrust::make_pair(out_true, out_false);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/reduce.h b/compat/thrust/system/detail/internal/scalar/reduce.h
deleted file mode 100644
index 7ad430ea50..0000000000
--- a/compat/thrust/system/detail/internal/scalar/reduce.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief Sequential implementation of reduce algorithm.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(InputIterator begin,
-                    InputIterator end,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  // wrap binary_op
-  thrust::detail::host_function<
-    BinaryFunction,
-    OutputType
-  > wrapped_binary_op(binary_op);
-
-  // initialize the result
-  OutputType result = init;
-
-  while(begin != end)
-  {
-    result = wrapped_binary_op(result, *begin);
-    ++begin;
-  } // end while
-
-  return result;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/reduce_by_key.h b/compat/thrust/system/detail/internal/scalar/reduce_by_key.h
deleted file mode 100644
index eeacb9dd49..0000000000
--- a/compat/thrust/system/detail/internal/scalar/reduce_by_key.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/pair.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type  InputKeyType;
-  typedef typename thrust::iterator_traits<InputIterator2>::value_type  InputValueType;
-
-  typedef typename thrust::detail::intermediate_type_from_function_and_iterators<
-    InputIterator2,
-    OutputIterator2,
-    BinaryFunction
-  >::type TemporaryType;
-
-  if(keys_first != keys_last)
-  {
-    InputKeyType  temp_key   = *keys_first;
-    TemporaryType temp_value = *values_first;
-
-    for(++keys_first, ++values_first;
-        keys_first != keys_last;
-        ++keys_first, ++values_first)
-    {
-      InputKeyType    key  = *keys_first;
-      InputValueType value = *values_first;
-
-      if (binary_pred(temp_key, key))
-      {
-        temp_value = binary_op(temp_value, value);
-      }
-      else
-      {
-        *keys_output   = temp_key;
-        *values_output = temp_value;
-
-        ++keys_output;
-        ++values_output;
-
-        temp_key   = key;
-        temp_value = value;
-      }
-    }
-
-    *keys_output   = temp_key;
-    *values_output = temp_value;
-
-    ++keys_output;
-    ++values_output;
-  }
-
-  return thrust::make_pair(keys_output, values_output);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/remove.h b/compat/thrust/system/detail/internal/scalar/remove.h
deleted file mode 100644
index 2360019f11..0000000000
--- a/compat/thrust/system/detail/internal/scalar/remove.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file remove.h
- *  \brief Sequential implementations of remove functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  // advance iterators until wrapped_pred(*first) is true or we reach the end of input
-  while(first != last && !wrapped_pred(*first))
-    ++first;
-
-  if(first == last)
-    return first;
-
-  // result always trails first 
-  ForwardIterator result = first;
-
-  ++first;
-
-  while(first != last)
-  {
-    if(!wrapped_pred(*first))
-    {
-      *result = *first;
-      ++result;
-    }
-    ++first;
-  }
-
-  return result;
-}
-
-
-template<typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  // advance iterators until wrapped_pred(*stencil) is true or we reach the end of input
-  while(first != last && !wrapped_pred(*stencil))
-  {
-    ++first;
-    ++stencil;
-  }
-
-  if(first == last)
-    return first;
-
-  // result always trails first 
-  ForwardIterator result = first;
-
-  ++first;
-  ++stencil;
-
-  while(first != last)
-  {
-    if(!wrapped_pred(*stencil))
-    {
-      *result = *first;
-      ++result;
-    }
-    ++first;
-    ++stencil;
-  }
-
-  return result;
-}
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  while (first != last)
-  {
-    if (!wrapped_pred(*first))
-    {
-      *result = *first;
-      ++result;
-    }
-
-    ++first;
-  }
-
-  return result;
-}
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // wrap pred
-  thrust::detail::host_function<
-    Predicate,
-    bool
-  > wrapped_pred(pred);
-
-  while (first != last)
-  {
-    if (!wrapped_pred(*stencil))
-    {
-      *result = *first;
-      ++result;
-    }
-
-    ++first;
-    ++stencil;
-  }
-
-  return result;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/scan.h b/compat/thrust/system/detail/internal/scalar/scan.h
deleted file mode 100644
index 8f41150247..0000000000
--- a/compat/thrust/system/detail/internal/scalar/scan.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.h
- *  \brief Sequential implementations of scan functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-  
-  using namespace thrust::detail;
-
-  typedef typename eval_if<
-    has_result_type<BinaryFunction>::value,
-    result_type<BinaryFunction>,
-    eval_if<
-      is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  // wrap binary_op
-  thrust::detail::host_function<
-    BinaryFunction,
-    ValueType
-  > wrapped_binary_op(binary_op);
-
-  if(first != last)
-  {
-    ValueType sum = *first;
-
-    *result = sum;
-
-    for(++first, ++result; first != last; ++first, ++result)
-      *result = sum = wrapped_binary_op(sum,*first);
-  }
-
-  return result;
-}
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-
-  using namespace thrust::detail;
-
-  typedef typename eval_if<
-    has_result_type<BinaryFunction>::value,
-    result_type<BinaryFunction>,
-    eval_if<
-      is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  if(first != last)
-  {
-    ValueType tmp = *first;  // temporary value allows in-situ scan
-    ValueType sum = init;
-
-    *result = sum;
-    sum = binary_op(sum, tmp);
-
-    for(++first, ++result; first != last; ++first, ++result)
-    {
-      tmp = *first;
-      *result = sum;
-      sum = binary_op(sum, tmp);
-    }
-  }
-
-  return result;
-} 
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/scan_by_key.h b/compat/thrust/system/detail/internal/scalar/scan_by_key.h
deleted file mode 100644
index a31fc60bab..0000000000
--- a/compat/thrust/system/detail/internal/scalar/scan_by_key.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan_by_key.h
- *  \brief Sequential implementation of scan_by_key functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       BinaryPredicate binary_pred,
-                                       BinaryFunction binary_op)
-{
-  using namespace thrust::detail;
-
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type KeyType;
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type ValueType;
-
-  // wrap binary_op
-  thrust::detail::host_function<
-    BinaryFunction,
-    ValueType
-  > wrapped_binary_op(binary_op);
-
-  if(first1 != last1)
-  {
-    KeyType   prev_key   = *first1;
-    ValueType prev_value = *first2;
-
-    *result = prev_value;
-
-    for(++first1, ++first2, ++result;
-        first1 != last1;
-        ++first1, ++first2, ++result)
-    {
-      KeyType key = *first1;
-
-      if (binary_pred(prev_key, key))
-        *result = prev_value = wrapped_binary_op(prev_value,*first2);
-      else
-        *result = prev_value = *first2;
-
-      prev_key = key;
-    }
-  }
-
-  return result;
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename T,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan_by_key(InputIterator1 first1,
-                                       InputIterator1 last1,
-                                       InputIterator2 first2,
-                                       OutputIterator result,
-                                       T init,
-                                       BinaryPredicate binary_pred,
-                                       BinaryFunction binary_op)
-{
-  using namespace thrust::detail;
-
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type KeyType;
-  typedef typename thrust::iterator_traits<OutputIterator>::value_type ValueType;
-
-  if(first1 != last1)
-  {
-    KeyType   temp_key   = *first1;
-    ValueType temp_value = *first2;
-
-    ValueType next = init;
-
-    // first one is init
-    *result = next;
-
-    next = binary_op(next, temp_value);
-
-    for(++first1, ++first2, ++result;
-        first1 != last1;
-        ++first1, ++first2, ++result)
-    {
-      KeyType key = *first1;
-
-      // use temp to permit in-place scans
-      temp_value = *first2;
-
-      if (!binary_pred(temp_key, key))
-        next = init;  // reset sum
-
-      *result = next;  
-      next = binary_op(next, temp_value);
-
-      temp_key = key;
-    }
-  }
-
-  return result;
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/set_operations.h b/compat/thrust/system/detail/internal/scalar/set_operations.h
deleted file mode 100644
index f85b5108af..0000000000
--- a/compat/thrust/system/detail/internal/scalar/set_operations.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file set_operations.h
- *  \brief Sequential implementation of set operation functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/internal/scalar/copy.h>
-#include <thrust/detail/function.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_difference(InputIterator1 first1,
-                                InputIterator1 last1,
-                                InputIterator2 first2,
-                                InputIterator2 last2,
-                                OutputIterator result,
-                                StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(first1 != last1 && first2 != last2)
-  {
-    if(wrapped_comp(*first1,*first2))
-    {
-      *result = *first1;
-      ++first1;
-      ++result;
-    } // end if
-    else if(wrapped_comp(*first2,*first1))
-    {
-      ++first2;
-    } // end else if
-    else
-    {
-      ++first1;
-      ++first2;
-    } // end else
-  } // end while
-
-  return scalar::copy(first1, last1, result);
-} // end set_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_intersection(InputIterator1 first1,
-                                  InputIterator1 last1,
-                                  InputIterator2 first2,
-                                  InputIterator2 last2,
-                                  OutputIterator result,
-                                  StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(first1 != last1 && first2 != last2)
-  {
-    if(wrapped_comp(*first1,*first2))
-    {
-      ++first1;
-    } // end if
-    else if(wrapped_comp(*first2,*first1))
-    {
-      ++first2;
-    } // end else if
-    else
-    {
-      *result = *first1;
-      ++first1;
-      ++first2;
-      ++result;
-    } // end else
-  } // end while
-
-  return result;
-} // end set_intersection()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_symmetric_difference(InputIterator1 first1,
-                                          InputIterator1 last1,
-                                          InputIterator2 first2,
-                                          InputIterator2 last2,
-                                          OutputIterator result,
-                                          StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(first1 != last1 && first2 != last2)
-  {
-    if(wrapped_comp(*first1,*first2))
-    {
-      *result = *first1;
-      ++first1;
-      ++result;
-    } // end if
-    else if(wrapped_comp(*first2,*first1))
-    {
-      *result = *first2;
-      ++first2;
-      ++result;
-    } // end else if
-    else
-    {
-      ++first1;
-      ++first2;
-    } // end else
-  } // end while
-
-  return scalar::copy(first2, last2, scalar::copy(first1, last1, result));
-} // end set_symmetric_difference()
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-  OutputIterator set_union(InputIterator1 first1,
-                           InputIterator1 last1,
-                           InputIterator2 first2,
-                           InputIterator2 last2,
-                           OutputIterator result,
-                           StrictWeakOrdering comp)
-{
-  // wrap comp
-  thrust::detail::host_function<
-    StrictWeakOrdering,
-    bool
-  > wrapped_comp(comp);
-
-  while(first1 != last1 && first2 != last2)
-  {
-    if(wrapped_comp(*first1,*first2))
-    {
-      *result = *first1;
-      ++first1;
-    } // end if
-    else if(wrapped_comp(*first2,*first1))
-    {
-      *result = *first2;
-      ++first2;
-    } // end else if
-    else
-    {
-      *result = *first1;
-      ++first1;
-      ++first2;
-    } // end else
-
-    ++result;
-  } // end while
-
-  return scalar::copy(first2, last2, scalar::copy(first1, last1, result));
-} // end set_union()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/sort.h b/compat/thrust/system/detail/internal/scalar/sort.h
deleted file mode 100644
index 9e465c8ca0..0000000000
--- a/compat/thrust/system/detail/internal/scalar/sort.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file sort.h
- *  \brief Sequential implementations of sort algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp);
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp);
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/sort.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/sort.inl b/compat/thrust/system/detail/internal/scalar/sort.inl
deleted file mode 100644
index c6ed27324c..0000000000
--- a/compat/thrust/system/detail/internal/scalar/sort.inl
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/reverse.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/internal/scalar/stable_merge_sort.h>
-#include <thrust/system/detail/internal/scalar/stable_primitive_sort.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-namespace sort_detail
-{
-
-////////////////////
-// Primitive Sort //
-////////////////////
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp,
-                 thrust::detail::true_type)
-{
-  thrust::system::detail::internal::scalar::stable_primitive_sort(first, last);
-        
-  // if comp is greater<T> then reverse the keys
-  typedef typename thrust::iterator_traits<RandomAccessIterator>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-
-  if (reverse)
-    thrust::reverse(first, last);
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp,
-                        thrust::detail::true_type)
-{
-  // if comp is greater<T> then reverse the keys and values
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  const static bool reverse = thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value;
-
-  // note, we also have to reverse the (unordered) input to preserve stability
-  if (reverse)
-  {
-    thrust::reverse(first1,  last1);
-    thrust::reverse(first2, first2 + (last1 - first1));
-  }
-
-  thrust::system::detail::internal::scalar::stable_primitive_sort_by_key(first1, last1, first2);
-
-  if (reverse)
-  {
-    thrust::reverse(first1,  last1);
-    thrust::reverse(first2, first2 + (last1 - first1));
-  }
-}
-
-////////////////
-// Merge Sort //
-////////////////
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp,
-                 thrust::detail::false_type)
-{
-  thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp);
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp,
-                        thrust::detail::false_type)
-{
-  thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp);
-}
-
-
-} // end namespace sort_detail
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_traits<RandomAccessIterator>::value_type KeyType;
-  static const bool use_primitive_sort = thrust::detail::is_arithmetic<KeyType>::value &&
-                                         (thrust::detail::is_same<StrictWeakOrdering, typename thrust::less<KeyType> >::value ||
-                                          thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value);
-
-  // supress unused variable warning
-  (void) use_primitive_sort;
-
-  thrust::system::detail::internal::scalar::sort_detail::stable_sort
-    (first, last, comp, 
-      thrust::detail::integral_constant<bool, use_primitive_sort>());
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(RandomAccessIterator1 first1,
-                        RandomAccessIterator1 last1,
-                        RandomAccessIterator2 first2,
-                        StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_traits<RandomAccessIterator1>::value_type KeyType;
-  static const bool use_primitive_sort = thrust::detail::is_arithmetic<KeyType>::value &&
-                                         (thrust::detail::is_same<StrictWeakOrdering, typename thrust::less<KeyType> >::value ||
-                                          thrust::detail::is_same<StrictWeakOrdering, typename thrust::greater<KeyType> >::value);
-
-  // supress unused variable warning
-  (void) use_primitive_sort;
-
-  thrust::system::detail::internal::scalar::sort_detail::stable_sort_by_key
-    (first1, last1, first2, comp, 
-      thrust::detail::integral_constant<bool, use_primitive_sort>());
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h
deleted file mode 100644
index f68242cc5a..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file stable_merge_sort.h
- *  \brief Sequential implementation of merge sort.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace cpp
-{
-namespace detail
-{
-
-template<typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_merge_sort(RandomAccessIterator begin,
-                       RandomAccessIterator end,
-                       StrictWeakOrdering comp);
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_merge_sort_by_key(RandomAccessIterator1 keys_begin,
-                              RandomAccessIterator1 keys_end,
-                              RandomAccessIterator2 values_begin,
-                              StrictWeakOrdering comp);
-
-} // end namespace detail
-} // end namespace cpp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/stable_merge_sort.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl
deleted file mode 100644
index 41d320cb08..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/detail/internal/scalar/merge.h>
-#include <thrust/system/detail/internal/scalar/insertion_sort.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-namespace detail
-{
-
-template <typename RandomAccessIterator,
-          typename StrictWeakOrdering>
-void inplace_merge(RandomAccessIterator first,
-                   RandomAccessIterator middle,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  // XXX the type of exec should be:
-  //     typedef decltype(select_system(first, middle, last)) DerivedPolicy;
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type DerivedPolicy;
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
-
-  // XXX assumes DerivedPolicy is default constructible
-  // XXX find a way to get a stateful execution policy into this function
-  //     or simply pass scratch space
-  DerivedPolicy exec;
-  thrust::detail::temporary_array<value_type, DerivedPolicy> a(exec, first, middle);
-  thrust::detail::temporary_array<value_type, DerivedPolicy> b(exec, middle, last);
-
-  thrust::system::detail::internal::scalar::merge(a.begin(), a.end(), b.begin(), b.end(), first, comp);
-}
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename StrictWeakOrdering>
-void inplace_merge_by_key(RandomAccessIterator1 first1,
-                          RandomAccessIterator1 middle1,
-                          RandomAccessIterator1 last1,
-                          RandomAccessIterator2 first2,
-                          StrictWeakOrdering comp)
-{
-  // XXX the type of exec should be:
-  //     typedef decltype(select_system(first1, middle1, last1, first2)) DerivedPolicy;
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type DerivedPolicy;
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2;
-
-  RandomAccessIterator2 middle2 = first2 + (middle1 - first1);
-  RandomAccessIterator2 last2   = first2 + (last1   - first1);
-
-  // XXX assumes DerivedPolicy is default constructible
-  // XXX find a way to get a stateful exec into this function
-  //     or simply pass scratch space
-  DerivedPolicy exec;
-  thrust::detail::temporary_array<value_type1, DerivedPolicy> lhs1(exec, first1, middle1);
-  thrust::detail::temporary_array<value_type1, DerivedPolicy> rhs1(exec, middle1, last1);
-  thrust::detail::temporary_array<value_type2, DerivedPolicy> lhs2(exec, first2, middle2);
-  thrust::detail::temporary_array<value_type2, DerivedPolicy> rhs2(exec, middle2, last2);
-
-  thrust::system::detail::internal::scalar::merge_by_key
-    (lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(),
-     lhs2.begin(), rhs2.begin(),
-     first1, first2, comp);
-}
-
-} // end namespace detail
-
-//////////////
-// Key Sort //
-//////////////
-
-template <typename RandomAccessIterator,
-          typename StrictWeakOrdering>
-void stable_merge_sort(RandomAccessIterator first,
-                       RandomAccessIterator last,
-                       StrictWeakOrdering comp)
-{
-  if (last - first < 32)
-  {
-    thrust::system::detail::internal::scalar::insertion_sort(first, last, comp);
-  }
-  else
-  {
-    RandomAccessIterator middle = first + (last - first) / 2;
-
-    thrust::system::detail::internal::scalar::stable_merge_sort(first, middle, comp);
-    thrust::system::detail::internal::scalar::stable_merge_sort(middle,  last, comp);
-    detail::inplace_merge(first, middle, last, comp);
-  }
-}
-
-
-////////////////////
-// Key-Value Sort //
-////////////////////
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename StrictWeakOrdering>
-void stable_merge_sort_by_key(RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2,
-                              StrictWeakOrdering comp)
-{
-  if (last1 - first1 <= 32)
-  {
-    thrust::system::detail::internal::scalar::insertion_sort_by_key(first1, last1, first2, comp);
-  }
-  else
-  {
-    RandomAccessIterator1 middle1 = first1 + (last1 - first1) / 2;
-    RandomAccessIterator2 middle2 = first2 + (last1 - first1) / 2;
-
-    thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, middle1, first2,  comp);
-    thrust::system::detail::internal::scalar::stable_merge_sort_by_key(middle1,  last1, middle2, comp);
-    detail::inplace_merge_by_key(first1, middle1, last1, first2, comp);
-  }
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h
deleted file mode 100644
index f37bf27cd1..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename RandomAccessIterator>
-void stable_primitive_sort(RandomAccessIterator first,
-                           RandomAccessIterator last);
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first,
-                                  RandomAccessIterator1 keys_last,
-                                  RandomAccessIterator2 values_first);
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/stable_primitive_sort.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl
deleted file mode 100644
index c22b15c0e4..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/internal/scalar/stable_primitive_sort.h>
-#include <thrust/system/detail/internal/scalar/stable_radix_sort.h>
-#include <thrust/functional.h>
-#include <thrust/system/detail/internal/scalar/partition.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-namespace stable_primitive_sort_detail
-{
-
-
-template<typename Iterator>
-  struct enable_if_bool_sort
-    : thrust::detail::enable_if<
-        thrust::detail::is_same<
-          bool,
-          typename thrust::iterator_value<Iterator>::type
-        >::value
-      >
-{};
-
-
-template<typename Iterator>
-  struct disable_if_bool_sort
-    : thrust::detail::disable_if<
-        thrust::detail::is_same<
-          bool,
-          typename thrust::iterator_value<Iterator>::type
-        >::value
-      >
-{};
-
-
-
-template<typename RandomAccessIterator>
-  typename enable_if_bool_sort<RandomAccessIterator>::type
-    stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last)
-{
-  // use stable_partition if we're sorting bool
-  // stable_partition puts true values first, so we need to logical_not
-  scalar::stable_partition(first, last, thrust::logical_not<bool>());
-}
-
-
-template<typename RandomAccessIterator>
-  typename disable_if_bool_sort<RandomAccessIterator>::type
-    stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last)
-{
-  // call stable_radix_sort
-  scalar::stable_radix_sort(first,last);
-}
-
-
-struct logical_not_first
-{
-  template<typename Tuple>
-  __host__ __device__
-  bool operator()(Tuple t)
-  {
-    return !thrust::get<0>(t);
-  }
-};
-
-
-template<typename RandomAccessIterator1, typename RandomAccessIterator2>
-  typename enable_if_bool_sort<RandomAccessIterator1>::type
-    stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last,
-                                 RandomAccessIterator2 values_first)
-{
-  // use stable_partition if we're sorting bool
-  // stable_partition puts true values first, so we need to logical_not
-  scalar::stable_partition(thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)),
-                           thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)),
-                           logical_not_first());
-}
-
-
-template<typename RandomAccessIterator1, typename RandomAccessIterator2>
-  typename disable_if_bool_sort<RandomAccessIterator1>::type
-    stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last,
-                                 RandomAccessIterator2 values_first)
-{
-  // call stable_radix_sort_by_key
-  scalar::stable_radix_sort_by_key(keys_first, keys_last, values_first);
-}
-
-
-}
-
-template<typename RandomAccessIterator>
-void stable_primitive_sort(RandomAccessIterator first,
-                           RandomAccessIterator last)
-{
-  scalar::stable_primitive_sort_detail::stable_primitive_sort(first,last);
-}
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first,
-                                  RandomAccessIterator1 keys_last,
-                                  RandomAccessIterator2 values_first)
-{
-  scalar::stable_primitive_sort_detail::stable_primitive_sort_by_key(keys_first, keys_last, values_first);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/stable_primitive_sort.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h
deleted file mode 100644
index f2af22263a..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file stable_radix_sort.h
- *  \brief Sequential implementation of radix sort.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename RandomAccessIterator>
-void stable_radix_sort(RandomAccessIterator begin,
-                       RandomAccessIterator end);
-
-template<typename RandomAccessIterator1,
-         typename RandomAccessIterator2>
-void stable_radix_sort_by_key(RandomAccessIterator1 keys_begin,
-                              RandomAccessIterator1 keys_end,
-                              RandomAccessIterator2 values_begin);
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/detail/internal/scalar/stable_radix_sort.inl>
-
diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl
deleted file mode 100644
index 98846ab101..0000000000
--- a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <limits>
-
-#include <thrust/copy.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/cstdint.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-namespace detail
-{
-
-template <typename T>
-struct RadixEncoder : public thrust::identity<T>
-{};
-
-template <>
-struct RadixEncoder<char> : public thrust::unary_function<char, unsigned char>
-{
-  unsigned char operator()(char x) const
-  {
-    if(std::numeric_limits<char>::is_signed)
-      return x ^ static_cast<unsigned char>(1) << (8 * sizeof(unsigned char) - 1);
-    else
-      return x;
-  }
-};
-
-template <>
-struct RadixEncoder<signed char> : public thrust::unary_function<signed char, unsigned char>
-{
-  unsigned char operator()(signed char x) const
-  {
-    return x ^ static_cast<unsigned char>(1) << (8 * sizeof(unsigned char) - 1);
-  }
-};
-
-template <>
-struct RadixEncoder<short> : public thrust::unary_function<short, unsigned short>
-{
-  unsigned short operator()(short x) const
-  {
-    return x ^ static_cast<unsigned short>(1) << (8 * sizeof(unsigned short) - 1);
-  }
-};
-
-template <>
-struct RadixEncoder<int> : public thrust::unary_function<int, unsigned int>
-{
-  unsigned long operator()(long x) const
-  {
-    return x ^ static_cast<unsigned int>(1) << (8 * sizeof(unsigned int) - 1);
-  }
-};
-
-template <>
-struct RadixEncoder<long> : public thrust::unary_function<long, unsigned long>
-{
-  unsigned long operator()(long x) const
-  {
-    return x ^ static_cast<unsigned long>(1) << (8 * sizeof(unsigned long) - 1);
-  }
-};
-
-template <>
-struct RadixEncoder<long long> : public thrust::unary_function<long long, unsigned long long>
-{
-  unsigned long long operator()(long long x) const
-  {
-    return x ^ static_cast<unsigned long long>(1) << (8 * sizeof(unsigned long long) - 1);
-  }
-};
-
-// ideally we'd use uint32 here and uint64 below
-template <>
-struct RadixEncoder<float> : public thrust::unary_function<float, thrust::detail::uint32_t>
-{
-  thrust::detail::uint32_t operator()(float x) const
-  {
-    union { float f; thrust::detail::uint32_t i; } u;
-    u.f = x;
-    thrust::detail::uint32_t mask = -static_cast<thrust::detail::int32_t>(u.i >> 31) | (static_cast<thrust::detail::uint32_t>(1) << 31);
-    return u.i ^ mask;
-  }
-};
-
-template <>
-struct RadixEncoder<double> : public thrust::unary_function<double, thrust::detail::uint64_t>
-{
-  thrust::detail::uint64_t operator()(double x) const
-  {
-    union { double f; thrust::detail::uint64_t i; } u;
-    u.f = x;
-    thrust::detail::uint64_t mask = -static_cast<thrust::detail::int64_t>(u.i >> 63) | (static_cast<thrust::detail::uint64_t>(1) << 63);
-    return u.i ^ mask;
-  }
-};
-
-
-template <unsigned int RadixBits,
-          bool HasValues,
-          typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename RandomAccessIterator3,
-          typename RandomAccessIterator4>
-void radix_sort(RandomAccessIterator1 keys1,
-                RandomAccessIterator2 keys2,
-                RandomAccessIterator3 vals1,
-                RandomAccessIterator4 vals2,
-                const size_t N)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-
-  typedef RadixEncoder<KeyType> Encoder;
-  typedef typename Encoder::result_type EncodedType;
-
-  static const unsigned int NumHistograms = (8 * sizeof(EncodedType) + (RadixBits - 1)) / RadixBits;
-  static const unsigned int HistogramSize =  1 << RadixBits;
-
-  static const EncodedType BitMask = static_cast<EncodedType>((1 << RadixBits) - 1);
-  
-  Encoder encode;
-
-  // storage for histograms
-  size_t histograms[NumHistograms][HistogramSize] = {{0}};
-
-  // see which passes can be eliminated
-  bool skip_shuffle[NumHistograms] = {false};
-  
-  // false if most recent data is stored in (keys1,vals1)
-  bool flip = false;
-    
-  // compute histograms
-  for (size_t i = 0; i < N; i++)
-  {
-    const EncodedType x = encode(keys1[i]);
-
-    for (unsigned int j = 0; j < NumHistograms; j++)
-    {
-      const EncodedType BitShift = RadixBits * j;
-      histograms[j][(x >> BitShift) & BitMask]++;
-    }
-  }
-
-  // scan histograms
-  for (unsigned int i = 0; i < NumHistograms; i++)
-  {
-    size_t sum = 0;
-
-    for (unsigned int j = 0; j < HistogramSize; j++)
-    {
-      size_t bin = histograms[i][j];
-
-      if (bin == N)
-        skip_shuffle[i] = true;
-
-      histograms[i][j] = sum;
-
-      sum = sum + bin;
-    }
-  }
-
-  // shuffle keys and (optionally) values 
-  for (unsigned int i = 0; i < NumHistograms; i++)
-  {
-    const EncodedType BitShift = static_cast<EncodedType>(RadixBits * i);
-
-    if (!skip_shuffle[i])
-    {
-      if (flip)
-      {
-        for (size_t j = 0; j < N; j++)
-        {
-          const EncodedType x = encode(keys2[j]);
-          size_t position = histograms[i][(x >> BitShift) & BitMask]++;
-
-          RandomAccessIterator1 temp_keys1 = keys1;
-          temp_keys1 += position;
-
-          RandomAccessIterator2 temp_keys2 = keys2;
-          temp_keys2 += j;
-
-          // keys1[position] = keys2[j]
-          *temp_keys1 = *temp_keys2;
-
-          if (HasValues)
-          {
-            RandomAccessIterator3 temp_vals1 = vals1;
-            temp_vals1 += position;
-
-            RandomAccessIterator4 temp_vals2 = vals2;
-            temp_vals2 += j;
-
-            // vals1[position] = vals2[j]
-            *temp_vals1 = *temp_vals2;
-          }
-        }
-      }
-      else
-      {
-        for (size_t j = 0; j < N; j++)
-        {
-          const EncodedType x = encode(keys1[j]);
-          size_t position = histograms[i][(x >> BitShift) & BitMask]++;
-
-          RandomAccessIterator1 temp_keys1 = keys1;
-          temp_keys1 += j;
-
-          RandomAccessIterator2 temp_keys2 = keys2;
-          temp_keys2 += position;
-
-          // keys2[position] = keys1[j];
-          *temp_keys2 = *temp_keys1;
-
-          if (HasValues)
-          {
-            RandomAccessIterator3 temp_vals1 = vals1;
-            temp_vals1 += j;
-
-            RandomAccessIterator4 temp_vals2 = vals2;
-            temp_vals2 += position;
-
-            // vals2[position] = vals1[j]
-            *temp_vals2 = *temp_vals1;
-          }
-        }
-      }
-        
-      flip = (flip) ? false : true;
-    }
-  }
- 
-  // ensure final values are in (keys1,vals1)
-  if (flip)
-  {
-    thrust::copy(keys2, keys2 + N, keys1);
-    if (HasValues)
-      thrust::copy(vals2, vals2 + N, vals1);
-  }
-}
-
-
-// Select best radix sort parameters based on sizeof(T) and input size
-// These particular values were determined through empirical testing on a Core i7 950 CPU
-template <size_t KeySize>
-struct radix_sort_dispatcher
-{
-};
-
-template <>
-struct radix_sort_dispatcher<1>
-{
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N)
-  {
-    detail::radix_sort<8,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-  }
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N)
-  {
-    detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N);
-  }
-};
-
-template <>
-struct radix_sort_dispatcher<2>
-{
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N)
-  {
-    if (N < (1 << 16))
-      detail::radix_sort<8,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-    else
-      detail::radix_sort<16,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-  }
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N)
-  {
-    if (N < (1 << 15))
-      detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N);
-    else
-      detail::radix_sort<16,true>(keys1, keys2, vals1, vals2, N);
-  }
-};
-
-template <>
-struct radix_sort_dispatcher<4>
-{
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N)
-  {
-    if (N < (1 << 22))
-      detail::radix_sort<8,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-    else
-      detail::radix_sort<4,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-  }
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N)
-  {
-    if (N < (1 << 22))
-      detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N);
-    else
-      detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N);
-  }
-};
-
-template <>
-struct radix_sort_dispatcher<8>
-{
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N)
-  {
-    if (N < (1 << 21))
-      detail::radix_sort<8,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-    else
-      detail::radix_sort<4,false>(keys1, keys2, static_cast<int *>(0), static_cast<int *>(0), N);
-  }
-  template <typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4>
-  void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N)
-  {
-    if (N < (1 << 21))
-      detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N);
-    else
-      detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N);
-  }
-};
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2>
-void radix_sort(RandomAccessIterator1 keys1,
-                RandomAccessIterator2 keys2,
-                const size_t N)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-  radix_sort_dispatcher<sizeof(KeyType)>()(keys1, keys2, N);
-}
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename RandomAccessIterator3,
-          typename RandomAccessIterator4>
-void radix_sort(RandomAccessIterator1 keys1,
-                RandomAccessIterator2 keys2,
-                RandomAccessIterator3 vals1,
-                RandomAccessIterator4 vals2,
-                const size_t N)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-  radix_sort_dispatcher<sizeof(KeyType)>()(keys1, keys2, vals1, vals2, N);
-}
-
-} // namespace detail
-
-//////////////
-// Key Sort //
-//////////////
-
-template <typename RandomAccessIterator>
-void stable_radix_sort(RandomAccessIterator first,
-                       RandomAccessIterator last)
-{
-  typedef typename thrust::iterator_system<RandomAccessIterator>::type ExecutionPolicy;
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type KeyType;
-
-  size_t N = last - first;
-  
-  // XXX assumes ExecutionPolicy is default constructible
-  // XXX consider how to get stateful systems into this function
-  ExecutionPolicy exec;
-  thrust::detail::temporary_array<KeyType, ExecutionPolicy> temp(exec, N);
-  
-  detail::radix_sort(first, temp.begin(), N);
-}
-
-
-////////////////////
-// Key-Value Sort //
-////////////////////
-
-template <typename RandomAccessIterator1,
-          typename RandomAccessIterator2>
-void stable_radix_sort_by_key(RandomAccessIterator1 first1,
-                              RandomAccessIterator1 last1,
-                              RandomAccessIterator2 first2)
-{
-  // XXX the type of exec should be
-  //     typedef decltype(select_system(first1,last1,first2)) system;
-  typedef typename thrust::iterator_system<RandomAccessIterator1>::type ExecutionPolicy;
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type KeyType;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type ValueType;
-
-  size_t N = last1 - first1;
-  
-  // XXX assumes ExecutionPolicy is default constructible
-  // XXX consider how to get stateful systems into this function
-  ExecutionPolicy exec;
-  thrust::detail::temporary_array<KeyType, ExecutionPolicy>   temp1(exec, N);
-  thrust::detail::temporary_array<ValueType, ExecutionPolicy> temp2(exec, N);
-
-  detail::radix_sort(first1, temp1.begin(), first2, temp2.begin(), N);
-}
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/trivial_copy.h b/compat/thrust/system/detail/internal/scalar/trivial_copy.h
deleted file mode 100644
index 8f008b54e4..0000000000
--- a/compat/thrust/system/detail/internal/scalar/trivial_copy.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file trivial_copy.h
- *  \brief Sequential copy algorithms for plain-old-data.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <cstring>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename T>
-  T *trivial_copy_n(const T *first,
-                    std::ptrdiff_t n,
-                    T *result)
-{
-  std::memmove(result, first, n * sizeof(T));
-  return result + n;
-} // end trivial_copy_n()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/unique.h b/compat/thrust/system/detail/internal/scalar/unique.h
deleted file mode 100644
index cfc60c948a..0000000000
--- a/compat/thrust/system/detail/internal/scalar/unique.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file unique.h
- *  \brief Sequential implementations of unique algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<InputIterator>::value_type T;
-
-  if(first != last)
-  {
-    T prev = *first;
-
-    for(++first; first != last; ++first)
-    {
-      T temp = *first;
-
-      if (!binary_pred(prev, temp))
-      {
-        *output = prev;
-
-        ++output;
-
-        prev = temp;
-      }
-    }
-
-    *output = prev;
-    ++output;
-  }
-
-  return output;
-} // end unique_copy()
-
-
-template<typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  // unique_copy() permits in-situ operation
-  return thrust::system::detail::internal::scalar::unique_copy(first, last, first, binary_pred);
-} // end unique()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/internal/scalar/unique_by_key.h b/compat/thrust/system/detail/internal/scalar/unique_by_key.h
deleted file mode 100644
index b0be2663e7..0000000000
--- a/compat/thrust/system/detail/internal/scalar/unique_by_key.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file unique_by_key.h
- *  \brief Sequential implementations of unique_by_key algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace detail
-{
-namespace internal
-{
-namespace scalar
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  typedef typename thrust::iterator_traits<InputIterator1>::value_type  InputKeyType;
-  typedef typename thrust::iterator_traits<OutputIterator2>::value_type OutputValueType;
-
-  if(keys_first != keys_last)
-  {
-    InputKeyType    temp_key   = *keys_first;
-    OutputValueType temp_value = *values_first;
-
-    for(++keys_first, ++values_first;
-        keys_first != keys_last;
-        ++keys_first, ++values_first)
-    {
-      InputKeyType    key   = *keys_first;
-      OutputValueType value = *values_first;
-
-      if(!binary_pred(temp_key, key))
-      {
-        *keys_output   = temp_key;
-        *values_output = temp_value;
-
-        ++keys_output;
-        ++values_output;
-
-        temp_key   = key;
-        temp_value = value;
-      }
-    }
-
-    *keys_output   = temp_key;
-    *values_output = temp_value;
-
-    ++keys_output;
-    ++values_output;
-  }
-
-  return thrust::make_pair(keys_output, values_output);
-} // end unique_by_key_copy()
-
-
-template<typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  // unique_by_key_copy() permits in-situ operation
-  return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_first, values_first, binary_pred);
-} // end unique_by_key()
-
-} // end namespace scalar
-} // end namespace internal
-} // end namespace detail
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/detail/system_error.inl b/compat/thrust/system/detail/system_error.inl
deleted file mode 100644
index 74909bee8a..0000000000
--- a/compat/thrust/system/detail/system_error.inl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/system/system_error.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-
-system_error
-  ::system_error(error_code ec, const std::string &what_arg)
-    : std::runtime_error(what_arg), m_error_code(ec)
-{
-
-} // end system_error::system_error()
-
-
-system_error
-  ::system_error(error_code ec, const char *what_arg)
-    : std::runtime_error(what_arg), m_error_code(ec)
-{
-  ;
-} // end system_error::system_error()
-
-
-system_error
-  ::system_error(error_code ec)
-    : std::runtime_error(""), m_error_code(ec)
-{
-  ;
-} // end system_error::system_error()
-
-
-system_error
-  ::system_error(int ev, const error_category &ecat, const std::string &what_arg)
-    : std::runtime_error(what_arg), m_error_code(ev,ecat)
-{
-  ;
-} // end system_error::system_error()
-
-
-system_error
-  ::system_error(int ev, const error_category &ecat, const char *what_arg)
-    : std::runtime_error(what_arg), m_error_code(ev,ecat)
-{
-  ;
-} // end system_error::system_error()
-
-
-system_error
-  ::system_error(int ev, const error_category &ecat)
-    : std::runtime_error(""), m_error_code(ev,ecat)
-{
-  ;
-} // end system_error::system_error()
-
-
-const error_code &system_error
-  ::code(void) const throw()
-{
-  return m_error_code;
-} // end system_error::code()
-
-
-const char *system_error
-  ::what(void) const throw()
-{
-  if(m_what.empty())
-  {
-    try
-    {
-      m_what = this->std::runtime_error::what();
-      if(m_error_code)
-      {
-        if(!m_what.empty()) m_what += ": ";
-        m_what += m_error_code.message();
-      }
-    }
-    catch(...)
-    {
-      return std::runtime_error::what();
-    }
-  }
-
-  return m_what.c_str();
-} // end system_error::what()
-
-
-} // end system
-
-} // end thrust
-
diff --git a/compat/thrust/system/error_code.h b/compat/thrust/system/error_code.h
deleted file mode 100644
index 2b6582c937..0000000000
--- a/compat/thrust/system/error_code.h
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file error_code.h
- *  \brief An object used to hold error values, such as those originating from the
- *         operating system or other low-level application program interfaces.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/system/detail/errno.h>
-#include <iostream>
-
-namespace thrust
-{
-
-namespace system
-{
-
-
-/*! \addtogroup system_diagnostics
- *  \{
- */
-
-class error_condition;
-class error_code;
-
-/*! A metafunction returning whether or not the parameter is an \p error_code enum.
- */
-template<typename T> struct is_error_code_enum : public thrust::detail::false_type {};
-
-/*! A metafunction returning whether or not the parameter is an \p error_condition enum.
- */
-template<typename T> struct is_error_condition_enum : public thrust::detail::false_type {};
-
-
-// XXX N3092 prefers enum class errc { ... }
-namespace errc
-{
-
-enum errc_t
-{
-  address_family_not_supported       = detail::eafnosupport,
-  address_in_use                     = detail::eaddrinuse,
-  address_not_available              = detail::eaddrnotavail,
-  already_connected                  = detail::eisconn,
-  argument_list_too_long             = detail::e2big,
-  argument_out_of_domain             = detail::edom,
-  bad_address                        = detail::efault,
-  bad_file_descriptor                = detail::ebadf,
-  bad_message                        = detail::ebadmsg,
-  broken_pipe                        = detail::epipe,
-  connection_aborted                 = detail::econnaborted,
-  connection_already_in_progress     = detail::ealready,
-  connection_refused                 = detail::econnrefused,
-  connection_reset                   = detail::econnreset,
-  cross_device_link                  = detail::exdev,
-  destination_address_required       = detail::edestaddrreq,
-  device_or_resource_busy            = detail::ebusy,
-  directory_not_empty                = detail::enotempty,
-  executable_format_error            = detail::enoexec,
-  file_exists                        = detail::eexist,
-  file_too_large                     = detail::efbig,
-  filename_too_long                  = detail::enametoolong,
-  function_not_supported             = detail::enosys,
-  host_unreachable                   = detail::ehostunreach,
-  identifier_removed                 = detail::eidrm,
-  illegal_byte_sequence              = detail::eilseq,
-  inappropriate_io_control_operation = detail::enotty,
-  interrupted                        = detail::eintr,
-  invalid_argument                   = detail::einval,
-  invalid_seek                       = detail::espipe,
-  io_error                           = detail::eio,
-  is_a_directory                     = detail::eisdir,
-  message_size                       = detail::emsgsize,
-  network_down                       = detail::enetdown,
-  network_reset                      = detail::enetreset,
-  network_unreachable                = detail::enetunreach,
-  no_buffer_space                    = detail::enobufs,
-  no_child_process                   = detail::echild,
-  no_link                            = detail::enolink,
-  no_lock_available                  = detail::enolck,
-  no_message_available               = detail::enodata,
-  no_message                         = detail::enomsg,
-  no_protocol_option                 = detail::enoprotoopt,
-  no_space_on_device                 = detail::enospc,
-  no_stream_resources                = detail::enosr,
-  no_such_device_or_address          = detail::enxio,
-  no_such_device                     = detail::enodev,
-  no_such_file_or_directory          = detail::enoent,
-  no_such_process                    = detail::esrch,
-  not_a_directory                    = detail::enotdir,
-  not_a_socket                       = detail::enotsock,
-  not_a_stream                       = detail::enostr,
-  not_connected                      = detail::enotconn,
-  not_enough_memory                  = detail::enomem,
-  not_supported                      = detail::enotsup,
-  operation_canceled                 = detail::ecanceled,
-  operation_in_progress              = detail::einprogress,
-  operation_not_permitted            = detail::eperm,
-  operation_not_supported            = detail::eopnotsupp,
-  operation_would_block              = detail::ewouldblock,
-  owner_dead                         = detail::eownerdead,
-  permission_denied                  = detail::eacces,
-  protocol_error                     = detail::eproto,
-  protocol_not_supported             = detail::eprotonosupport,
-  read_only_file_system              = detail::erofs,
-  resource_deadlock_would_occur      = detail::edeadlk,
-  resource_unavailable_try_again     = detail::eagain,
-  result_out_of_range                = detail::erange,
-  state_not_recoverable              = detail::enotrecoverable,
-  stream_timeout                     = detail::etime,
-  text_file_busy                     = detail::etxtbsy,
-  timed_out                          = detail::etimedout,
-  too_many_files_open_in_system      = detail::enfile,
-  too_many_files_open                = detail::emfile,
-  too_many_links                     = detail::emlink,
-  too_many_symbolic_link_levels      = detail::eloop,
-  value_too_large                    = detail::eoverflow,
-  wrong_protocol_type                = detail::eprototype
-}; // end errc_t
-
-} // end namespace errc
-
-
-/*! Specialization of \p is_error_condition_enum for \p errc::errc_t
- */
-template<> struct is_error_condition_enum<errc::errc_t> : public thrust::detail::true_type {};
-
-
-// [19.5.1.1] class error_category
-
-/*! \brief The class \p error_category serves as a base class for types used to identify the
- *         source and encoding of a particular category of error code. Classes may be derived
- *         from \p error_category to support categories of errors in addition to those defined
- *         in the C++ International Standard.
- */
-class error_category
-{
-  public:
-    /*! Destructor does nothing.
-     */
-    inline virtual ~error_category(void);
-
-    // XXX enable upon c++0x
-    // error_category(const error_category &) = delete;
-    // error_category &operator=(const error_category &) = delete;
-
-    /*! \return A string naming the error category.
-     */
-    inline virtual const char *name(void) const = 0;
-
-    /*! \return \p error_condition(ev, *this).
-     */
-    inline virtual error_condition default_error_condition(int ev) const;
-
-    /*! \return <tt>default_error_condition(code) == condition</tt>
-     */
-    inline virtual bool equivalent(int code, const error_condition &condition) const;
-
-    /*! \return <tt>*this == code.category() && code.value() == condition</tt>
-     */
-    inline virtual bool equivalent(const error_code &code, int condition) const;
-
-    /*! \return A string that describes the error condition denoted by \p ev.
-     */
-    virtual std::string message(int ev) const = 0;
-
-    /*! \return <tt>*this == &rhs</tt>
-     */
-    inline bool operator==(const error_category &rhs) const;
-
-    /*! \return <tt>!(*this == rhs)</tt>
-     */
-    inline bool operator!=(const error_category &rhs) const;
-
-    /*! \return <tt>less<const error_category*>()(this, &rhs)</tt>
-     *  \note \c less provides a total ordering for pointers.
-     */
-    inline bool operator<(const error_category &rhs) const;
-}; // end error_category
-
-
-// [19.5.1.5] error_category objects
-
-
-/*! \return A reference to an object of a type derived from class \p error_category.
- *  \note The object's \p default_error_condition and \p equivalent virtual functions
- *        shall behave as specified for the class \p error_category. The object's
- *        \p name virtual function shall return a pointer to the string <tt>"generic"</tt>.
- */
-inline const error_category &generic_category(void);
-
-
-/*! \return A reference to an object of a type derived from class \p error_category.
- *  \note The object's \p equivalent virtual functions shall behave as specified for
- *        class \p error_category. The object's \p name virtual function shall return
- *        a pointer to the string <tt>"system"</tt>. The object's \p default_error_condition
- *        virtual function shall behave as follows:
- *
- *        If the argument <tt>ev</tt> corresponds to a POSIX <tt>errno</tt> value
- *        \c posv, the function shall return <tt>error_condition(ev,generic_category())</tt>.
- *        Otherwise, the function shall return <tt>error_condition(ev,system_category())</tt>.
- *        What constitutes correspondence for any given operating system is unspecified.
- */
-inline const error_category &system_category(void);
-
-
-// [19.5.2] Class error_code
-
-
-/*! \brief The class \p error_code describes an object used to hold error code values, such as
- *         those originating from the operating system or other low-level application program
- *         interfaces.
- */
-class error_code
-{
-  public:
-    // [19.5.2.2] constructors:
-
-    /*! Effects: Constructs an object of type \p error_code.
-     *  \post <tt>value() == 0</tt> and <tt>category() == &system_category()</tt>.
-     */
-    inline error_code(void);
-
-    /*! Effects: Constructs an object of type \p error_code.
-     *  \post <tt>value() == val</tt> and <tt>category() == &cat</tt>.
-     */
-    inline error_code(int val, const error_category &cat);
-
-    /*! Effects: Constructs an object of type \p error_code.
-     *  \post <tt>*this == make_error_code(e)</tt>.
-     */
-    template <typename ErrorCodeEnum>
-      error_code(ErrorCodeEnum e
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-        , typename thrust::detail::enable_if<is_error_code_enum<ErrorCodeEnum>::value>::type * = 0
-#endif // THRUST_HOST_COMPILER_MSVC
-        );
-
-    // [19.5.2.3] modifiers:
-
-    /*! \post <tt>value() == val</tt> and <tt>category() == &cat</tt>.
-     */
-    inline void assign(int val, const error_category &cat);
-
-    /*! \post <tt>*this == make_error_code(e)</tt>.
-     */
-    template <typename ErrorCodeEnum>
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-      typename thrust::detail::enable_if<is_error_code_enum<ErrorCodeEnum>::value, error_code>::type &
-#else
-      error_code &
-#endif // THRUST_HOST_COMPILER_MSVC
-        operator=(ErrorCodeEnum e);
-
-    /*! \post <tt>value() == 0</tt> and <tt>category() == system_category()</tt>.
-     */
-    inline void clear(void);
-
-    // [19.5.2.4] observers:
-
-    /*! \return An integral value of this \p error_code object.
-     */
-    inline int value(void) const;
-
-    /*! \return An \p error_category describing the category of this \p error_code object.
-     */
-    inline const error_category &category(void) const;
-
-    /*! \return <tt>category().default_error_condition()</tt>.
-     */
-    inline error_condition default_error_condition(void) const;
-
-    /*! \return <tt>category().message(value())</tt>.
-     */
-    inline std::string message(void) const;
-
-    // XXX replace the below upon c++0x
-    // inline explicit operator bool (void) const;
-
-    /*! \return <tt>value() != 0</tt>.
-     */
-    inline operator bool (void) const;
-
-    /*! \cond
-     */
-  private:
-    int m_val;
-    const error_category *m_cat;
-    /*! \endcond
-     */
-}; // end error_code
-
-
-// [19.5.2.5] Class error_code non-member functions
-
-
-// XXX replace errc::errc_t with errc upon c++0x
-/*! \return <tt>error_code(static_cast<int>(e), generic_category())</tt>
- */
-inline error_code make_error_code(errc::errc_t e);
-
-
-/*! \return <tt>lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value()</tt>.
- */
-inline bool operator<(const error_code &lhs, const error_code &rhs);
-
-
-/*! Effects: <tt>os << ec.category().name() << ':' << ec.value()</tt>.
- */
-template <typename charT, typename traits>
-  std::basic_ostream<charT,traits>&
-    operator<<(std::basic_ostream<charT,traits>& os, const error_code &ec);
-
-
-// [19.5.3] class error_condition
-
-
-/*! \brief The class \p error_condition describes an object used to hold values identifying
- *  error conditions.
- *
- *  \note \p error_condition values are portable abstractions, while \p error_code values
- *        are implementation specific.
- */
-class error_condition
-{
-  public:
-    // [19.5.3.2] constructors
-
-    /*! Constructs an object of type \p error_condition.
-     *  \post <tt>value() == 0</tt>.
-     *  \post <tt>category() == generic_category()</tt>.
-     */
-    inline error_condition(void);
-
-    /*! Constructs an object of type \p error_condition.
-     *  \post <tt>value() == val</tt>.
-     *  \post <tt>category() == cat</tt>.
-     */
-    inline error_condition(int val, const error_category &cat);
-
-    /*! Constructs an object of type \p error_condition.
-     *  \post <tt>*this == make_error_condition(e)</tt>.
-     *  \note This constructor shall not participate in overload resolution unless
-     *        <tt>is_error_condition_enum<ErrorConditionEnum>::value</tt> is <tt>true</tt>.
-     */
-    template<typename ErrorConditionEnum>
-      error_condition(ErrorConditionEnum e
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-        , typename thrust::detail::enable_if<is_error_condition_enum<ErrorConditionEnum>::value>::type * = 0
-#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-                     );
-
-    // [19.5.3.3] modifiers
-
-    /*! Assigns to this \p error_code object from an error value and an \p error_category.
-     *  \param val The new value to return from <tt>value()</tt>.
-     *  \param cat The new \p error_category to return from <tt>category()</tt>.
-     *  \post <tt>value() == val</tt>.
-     *  \post <tt>category() == cat</tt>.
-     */
-    inline void assign(int val, const error_category &cat);
-
-    /*! Assigns to this \p error_code object from an error condition enumeration.
-     *  \return *this
-     *  \post <tt>*this == make_error_condition(e)</tt>.
-     *  \note This operator shall not participate in overload resolution unless
-     *        <tt>is_error_condition_enum<ErrorConditionEnum>::value</tt> is <tt>true</tt>.
-     */
-    template<typename ErrorConditionEnum>
-// XXX WAR msvc's problem with enable_if
-#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-      typename thrust::detail::enable_if<is_error_condition_enum<ErrorConditionEnum>::value, error_condition>::type &
-#else
-      error_condition &
-#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC
-        operator=(ErrorConditionEnum e);
-
-    /*! Clears this \p error_code object.
-     *  \post <tt>value == 0</tt>
-     *  \post <tt>category() == generic_category()</tt>.
-     */
-    inline void clear(void);
-
-    // [19.5.3.4] observers
-
-    /*! \return The value encoded by this \p error_condition.
-     */
-    inline int value(void) const;
-
-    /*! \return A <tt>const</tt> reference to the \p error_category encoded by this \p error_condition.
-     */
-    inline const error_category &category(void) const;
-
-    /*! \return <tt>category().message(value())</tt>.
-     */
-    inline std::string message(void) const;
-
-    // XXX replace below with this upon c++0x
-    //explicit operator bool (void) const;
-    
-    /*! \return <tt>value() != 0</tt>.
-     */
-    inline operator bool (void) const;
-
-    /*! \cond
-     */
-
-  private:
-    int m_val;
-    const error_category *m_cat;
-
-    /*! \endcond
-     */
-}; // end error_condition
-
-
-
-// [19.5.3.5] Class error_condition non-member functions
-
-// XXX replace errc::errc_t with errc upon c++0x
-/*! \return <tt>error_condition(static_cast<int>(e), generic_category())</tt>.
- */
-inline error_condition make_error_condition(errc::errc_t e);
-
-
-/*! \return <tt>lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value()</tt>.
- */
-inline bool operator<(const error_condition &lhs, const error_condition &rhs);
-
-
-// [19.5.4] Comparison operators
-
-
-/*! \return <tt>lhs.category() == rhs.category() && lhs.value() == rhs.value()</tt>.
- */
-inline bool operator==(const error_code &lhs, const error_code &rhs);
-
-
-/*! \return <tt>lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value())</tt>.
- */
-inline bool operator==(const error_code &lhs, const error_condition &rhs);
-
-
-/*! \return <tt>rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value())</tt>.
- */
-inline bool operator==(const error_condition &lhs, const error_code &rhs);
-
-
-/*! \return <tt>lhs.category() == rhs.category() && lhs.value() == rhs.value()</tt>
- */
-inline bool operator==(const error_condition &lhs, const error_condition &rhs);
-
-
-/*! \return <tt>!(lhs == rhs)</tt>
- */
-inline bool operator!=(const error_code &lhs, const error_code &rhs);
-
-
-/*! \return <tt>!(lhs == rhs)</tt>
- */
-inline bool operator!=(const error_code &lhs, const error_condition &rhs);
-
-
-/*! \return <tt>!(lhs == rhs)</tt>
- */
-inline bool operator!=(const error_condition &lhs, const error_code &rhs);
-
-
-/*! \return <tt>!(lhs == rhs)</tt>
- */
-inline bool operator!=(const error_condition &lhs, const error_condition &rhs);
-
-/*! \} // end system_diagnostics
- */
-
-
-} // end system
-
-
-// import names into thrust::
-using system::error_category;
-using system::error_code;
-using system::error_condition;
-using system::is_error_code_enum;
-using system::is_error_condition_enum;
-using system::make_error_code;
-using system::make_error_condition;
-
-// XXX replace with using system::errc upon c++0x
-namespace errc = system::errc;
-
-using system::generic_category;
-using system::system_category;
-
-} // end thrust
-
-#include <thrust/system/detail/error_category.inl>
-#include <thrust/system/detail/error_code.inl>
-#include <thrust/system/detail/error_condition.inl>
-
diff --git a/compat/thrust/system/omp/detail/adjacent_difference.h b/compat/thrust/system/omp/detail/adjacent_difference.h
deleted file mode 100644
index 0bbc1884cc..0000000000
--- a/compat/thrust/system/omp/detail/adjacent_difference.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/system/detail/generic/adjacent_difference.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     OutputIterator result,
-                                     BinaryFunction binary_op)
-{
-  // omp prefers generic::adjacent_difference to cpp::adjacent_difference
-  return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op);
-} // end adjacent_difference()
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/assign_value.h b/compat/thrust/system/omp/detail/assign_value.h
deleted file mode 100644
index eda3b977b8..0000000000
--- a/compat/thrust/system/omp/detail/assign_value.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits assign_value
-#include <thrust/system/cpp/detail/assign_value.h>
-
diff --git a/compat/thrust/system/omp/detail/binary_search.h b/compat/thrust/system/omp/detail/binary_search.h
deleted file mode 100644
index 254e6fd445..0000000000
--- a/compat/thrust/system/omp/detail/binary_search.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/system/detail/generic/binary_search.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-ForwardIterator lower_bound(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-    // omp prefers generic::lower_bound to cpp::lower_bound
-    return thrust::system::detail::generic::lower_bound(exec, begin, end, value, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering, typename Backend>
-ForwardIterator upper_bound(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator begin,
-                            ForwardIterator end,
-                            const T& value, 
-                            StrictWeakOrdering comp)
-{
-    // omp prefers generic::upper_bound to cpp::upper_bound
-    return thrust::system::detail::generic::upper_bound(exec, begin, end, value, comp);
-}
-
-
-template <typename DerivedPolicy, typename ForwardIterator, typename T, typename StrictWeakOrdering>
-bool binary_search(execution_policy<DerivedPolicy> &exec,
-                   ForwardIterator begin,
-                   ForwardIterator end,
-                   const T& value, 
-                   StrictWeakOrdering comp)
-{
-    // omp prefers generic::binary_search to cpp::binary_search
-    return thrust::system::detail::generic::binary_search(exec, begin, end, value, comp);
-}
-
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/copy.h b/compat/thrust/system/omp/detail/copy.h
deleted file mode 100644
index b23ac18801..0000000000
--- a/compat/thrust/system/omp/detail/copy.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      Size n,
-                      OutputIterator result);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/copy.inl>
-
diff --git a/compat/thrust/system/omp/detail/copy.inl b/compat/thrust/system/omp/detail/copy.inl
deleted file mode 100644
index 915ff92d70..0000000000
--- a/compat/thrust/system/omp/detail/copy.inl
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/copy.h>
-#include <thrust/system/detail/generic/copy.h>
-#include <thrust/detail/type_traits/minimum_type.h>
-#include <thrust/system/cpp/detail/copy.h>
-#include <thrust/iterator/detail/retag.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-namespace dispatch
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::incrementable_traversal_tag)
-{
-  return thrust::system::cpp::detail::copy(exec, first, last, result);
-} // end copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::random_access_traversal_tag)
-{
-  // XXX WAR problems reconciling unrelated types such as omp & tbb
-  // reinterpret iterators as the policy we were passed
-  // this ensures that generic::copy's implementation, which eventually results in
-  // zip_iterator works correctly
-  thrust::detail::tagged_iterator<OutputIterator,DerivedPolicy> retagged_result(result);
-
-  return thrust::system::detail::generic::copy(exec, thrust::reinterpret_tag<DerivedPolicy>(first), thrust::reinterpret_tag<DerivedPolicy>(last), retagged_result).base();
-} // end copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::incrementable_traversal_tag)
-{
-  return thrust::system::cpp::detail::copy_n(exec, first, n, result);
-} // end copy_n()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::random_access_traversal_tag)
-{
-  // XXX WAR problems reconciling unrelated types such as omp & tbb
-  // reinterpret iterators as the policy we were passed
-  // this ensures that generic::copy's implementation, which eventually results in
-  // zip_iterator works correctly
-  thrust::detail::tagged_iterator<OutputIterator,DerivedPolicy> retagged_result(result);
-
-  return thrust::system::detail::generic::copy_n(exec, thrust::reinterpret_tag<DerivedPolicy>(first), n, retagged_result).base();
-} // end copy_n()
-
-} // end dispatch
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputIterator result)
-{
-  typedef typename thrust::iterator_traversal<InputIterator>::type  traversal1;
-  typedef typename thrust::iterator_traversal<OutputIterator>::type traversal2;
-  
-  typedef typename thrust::detail::minimum_type<traversal1,traversal2>::type traversal;
-
-  // dispatch on minimum traversal
-  return thrust::system::omp::detail::dispatch::copy(exec, first,last,result,traversal());
-} // end copy()
-
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      Size n,
-                      OutputIterator result)
-{
-  typedef typename thrust::iterator_traversal<InputIterator>::type  traversal1;
-  typedef typename thrust::iterator_traversal<OutputIterator>::type traversal2;
-  
-  typedef typename thrust::detail::minimum_type<traversal1,traversal2>::type traversal;
-
-  // dispatch on minimum traversal
-  return thrust::system::omp::detail::dispatch::copy_n(exec,first,n,result,traversal());
-} // end copy_n()
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/copy_if.h b/compat/thrust/system/omp/detail/copy_if.h
deleted file mode 100644
index 46754a9f21..0000000000
--- a/compat/thrust/system/omp/detail/copy_if.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(execution_policy<DerivedPolicy> &exec,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
-#include <thrust/system/omp/detail/copy_if.inl>
-
diff --git a/compat/thrust/system/omp/detail/copy_if.inl b/compat/thrust/system/omp/detail/copy_if.inl
deleted file mode 100644
index 1af6a215a6..0000000000
--- a/compat/thrust/system/omp/detail/copy_if.inl
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/copy_if.h>
-#include <thrust/system/detail/generic/copy_if.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(execution_policy<DerivedPolicy> &exec,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  // omp prefers generic::copy_if to cpp::copy_if
-  return thrust::system::detail::generic::copy_if(exec, first, last, stencil, result, pred);
-} // end copy_if()
-
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/count.h b/compat/thrust/system/omp/detail/count.h
deleted file mode 100644
index da31ee8700..0000000000
--- a/compat/thrust/system/omp/detail/count.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits count
-#include <thrust/system/cpp/detail/count.h>
-
diff --git a/compat/thrust/system/omp/detail/default_decomposition.h b/compat/thrust/system/omp/detail/default_decomposition.h
deleted file mode 100644
index f1904c29c6..0000000000
--- a/compat/thrust/system/omp/detail/default_decomposition.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file default_decomposition.h
- *  \brief Return a decomposition that is appropriate for the OpenMP backend.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/internal/decompose.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename IndexType>
-thrust::system::detail::internal::uniform_decomposition<IndexType> default_decomposition(IndexType n);
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/default_decomposition.inl>
-
diff --git a/compat/thrust/system/omp/detail/default_decomposition.inl b/compat/thrust/system/omp/detail/default_decomposition.inl
deleted file mode 100644
index 366b4f56c2..0000000000
--- a/compat/thrust/system/omp/detail/default_decomposition.inl
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/default_decomposition.h>
-
-// don't attempt to #include this file without omp support
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-#include <omp.h>
-#endif // omp support
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename IndexType>
-thrust::system::detail::internal::uniform_decomposition<IndexType> default_decomposition(IndexType n)
-{
-  // we're attempting to launch an omp kernel, assert we're compiling with omp support
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to OpenMP support in your compiler.                         X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<IndexType,
-                        (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
-
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-  return thrust::system::detail::internal::uniform_decomposition<IndexType>(n, 1, omp_get_num_procs());
-#else
-  return thrust::system::detail::internal::uniform_decomposition<IndexType>(n, 1, 1);
-#endif
-}
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/equal.h b/compat/thrust/system/omp/detail/equal.h
deleted file mode 100644
index 74e55183d9..0000000000
--- a/compat/thrust/system/omp/detail/equal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits equal
-#include <thrust/system/cpp/detail/equal.h>
-
diff --git a/compat/thrust/system/omp/detail/execution_policy.h b/compat/thrust/system/omp/detail/execution_policy.h
deleted file mode 100644
index 1b06224217..0000000000
--- a/compat/thrust/system/omp/detail/execution_policy.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-// put the canonical tag in the same ns as the backend's entry points
-namespace omp
-{
-namespace detail
-{
-
-// this awkward sequence of definitions arise
-// from the desire both for tag to derive
-// from execution_policy and for execution_policy
-// to convert to tag (when execution_policy is not
-// an ancestor of tag)
-
-// forward declaration of tag
-struct tag;
-
-// forward declaration of execution_policy
-template<typename> struct execution_policy;
-
-// specialize execution_policy for tag
-template<>
-  struct execution_policy<tag>
-    : thrust::system::cpp::detail::execution_policy<tag>
-{};
-
-// tag's definition comes before the
-// generic definition of execution_policy
-struct tag : execution_policy<tag> {};
-
-// allow conversion to tag when it is not a successor
-template<typename Derived>
-  struct execution_policy
-    : thrust::system::cpp::detail::execution_policy<Derived>
-{
-  // allow conversion to tag
-  inline operator tag () const
-  {
-    return tag();
-  }
-};
-
-
-// overloads of select_system
-
-// XXX select_system(tbb, omp) & select_system(omp, tbb) are ambiguous
-//     because both convert to cpp without these overloads, which we
-//     arbitrarily define in the omp backend
-
-template<typename System1, typename System2>
-inline __host__ __device__
-  System1 select_system(execution_policy<System1> s, thrust::system::tbb::detail::execution_policy<System2>)
-{
-  return thrust::detail::derived_cast(s);
-} // end select_system()
-
-
-template<typename System1, typename System2>
-inline __host__ __device__
-  System2 select_system(thrust::system::tbb::detail::execution_policy<System1>, execution_policy<System2> s)
-{
-  return thrust::detail::derived_cast(s);
-} // end select_system()
-
-
-} // end detail
-
-// alias execution_policy and tag here
-using thrust::system::omp::detail::execution_policy;
-using thrust::system::omp::detail::tag;
-
-} // end omp
-} // end system
-
-// alias items at top-level
-namespace omp
-{
-
-using thrust::system::omp::execution_policy;
-using thrust::system::omp::tag;
-
-} // end omp
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/extrema.h b/compat/thrust/system/omp/detail/extrema.h
deleted file mode 100644
index fb96770b97..0000000000
--- a/compat/thrust/system/omp/detail/extrema.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/system/detail/generic/extrema.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // omp prefers generic::max_element to cpp::max_element
-  return thrust::system::detail::generic::max_element(exec, first, last, comp);
-} // end max_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // omp prefers generic::min_element to cpp::min_element
-  return thrust::system::detail::generic::min_element(exec, first, last, comp);
-} // end min_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp)
-{
-  // omp prefers generic::minmax_element to cpp::minmax_element
-  return thrust::system::detail::generic::minmax_element(exec, first, last, comp);
-} // end minmax_element()
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
-
diff --git a/compat/thrust/system/omp/detail/fill.h b/compat/thrust/system/omp/detail/fill.h
deleted file mode 100644
index 5219e1c7c5..0000000000
--- a/compat/thrust/system/omp/detail/fill.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits fill
-#include <thrust/system/cpp/detail/fill.h>
-
diff --git a/compat/thrust/system/omp/detail/find.h b/compat/thrust/system/omp/detail/find.h
deleted file mode 100644
index a8dca5ad4d..0000000000
--- a/compat/thrust/system/omp/detail/find.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file find.h
- *  \brief OpenMP implementation of find_if. 
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/find.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  // omp prefers generic::find_if to cpp::find_if
-  return thrust::system::detail::generic::find_if(exec, first, last, pred);
-}
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/for_each.h b/compat/thrust/system/omp/detail/for_each.h
deleted file mode 100644
index 1030623ec0..0000000000
--- a/compat/thrust/system/omp/detail/for_each.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.h
- *  \brief Defines the interface for a function that executes a 
- *  function or functional for each value in a given range.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename UnaryFunction>
-  RandomAccessIterator for_each(execution_policy<DerivedPolicy> &exec,
-                                RandomAccessIterator first,
-                                RandomAccessIterator last,
-                                UnaryFunction f);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-  RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &exec,
-                                  RandomAccessIterator first,
-                                  Size n,
-                                  UnaryFunction f);
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/for_each.inl>
-
diff --git a/compat/thrust/system/omp/detail/for_each.inl b/compat/thrust/system/omp/detail/for_each.inl
deleted file mode 100644
index c6ab8277ba..0000000000
--- a/compat/thrust/system/omp/detail/for_each.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file for_each.inl
- *  \brief Inline file for for_each.h.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/distance.h>
-#include <thrust/detail/function.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/for_each.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &,
-                                RandomAccessIterator first,
-                                Size n,
-                                UnaryFunction f)
-{
-  // we're attempting to launch an omp kernel, assert we're compiling with omp support
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to enable OpenMP support in your compiler.                  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator,
-                        (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
-
-  if (n <= 0) return first;  //empty range
-
-  // create a wrapped function for f
-  typedef typename thrust::iterator_reference<RandomAccessIterator>::type reference;
-  thrust::detail::host_function<UnaryFunction,void> wrapped_f(f);
-
-// do not attempt to compile the body of this function, which depends on #pragma omp,
-// without support from the compiler
-// XXX implement the body of this function in another file to eliminate this ugliness
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-  // use a signed type for the iteration variable or suffer the consequences of warnings
-  typedef typename thrust::iterator_difference<RandomAccessIterator>::type DifferenceType;
-  DifferenceType signed_n = n;
-#pragma omp parallel for
-  for(DifferenceType i = 0;
-      i < signed_n;
-      ++i)
-  {
-    RandomAccessIterator temp = first + i;
-    wrapped_f(*temp);
-  }
-#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
-
-  return first + n;
-} // end for_each_n() 
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename UnaryFunction>
-  RandomAccessIterator for_each(execution_policy<DerivedPolicy> &s,
-                                RandomAccessIterator first,
-                                RandomAccessIterator last,
-                                UnaryFunction f)
-{
-  return omp::detail::for_each_n(s, first, thrust::distance(first,last), f);
-} // end for_each()
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/gather.h b/compat/thrust/system/omp/detail/gather.h
deleted file mode 100644
index dfb7d7fc2d..0000000000
--- a/compat/thrust/system/omp/detail/gather.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits gather
-#include <thrust/system/cpp/detail/gather.h>
-
diff --git a/compat/thrust/system/omp/detail/generate.h b/compat/thrust/system/omp/detail/generate.h
deleted file mode 100644
index 0cb33b9336..0000000000
--- a/compat/thrust/system/omp/detail/generate.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits generate
-#include <thrust/system/cpp/detail/generate.h>
-
diff --git a/compat/thrust/system/omp/detail/get_value.h b/compat/thrust/system/omp/detail/get_value.h
deleted file mode 100644
index e376e65749..0000000000
--- a/compat/thrust/system/omp/detail/get_value.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits get_value
-#include <thrust/system/cpp/detail/get_value.h>
-
diff --git a/compat/thrust/system/omp/detail/inner_product.h b/compat/thrust/system/omp/detail/inner_product.h
deleted file mode 100644
index 351421a577..0000000000
--- a/compat/thrust/system/omp/detail/inner_product.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits inner_product
-#include <thrust/system/cpp/detail/inner_product.h>
-
diff --git a/compat/thrust/system/omp/detail/iter_swap.h b/compat/thrust/system/omp/detail/iter_swap.h
deleted file mode 100644
index 16176ec69b..0000000000
--- a/compat/thrust/system/omp/detail/iter_swap.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits iter_swap
-#include <thrust/system/cpp/detail/iter_swap.h>
-
diff --git a/compat/thrust/system/omp/detail/logical.h b/compat/thrust/system/omp/detail/logical.h
deleted file mode 100644
index b2a80de70f..0000000000
--- a/compat/thrust/system/omp/detail/logical.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits logical
-#include <thrust/system/cpp/detail/logical.h>
-
diff --git a/compat/thrust/system/omp/detail/malloc_and_free.h b/compat/thrust/system/omp/detail/malloc_and_free.h
deleted file mode 100644
index 811a552a4f..0000000000
--- a/compat/thrust/system/omp/detail/malloc_and_free.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits malloc and free
-#include <thrust/system/cpp/detail/malloc_and_free.h>
-
diff --git a/compat/thrust/system/omp/detail/memory.inl b/compat/thrust/system/omp/detail/memory.inl
deleted file mode 100644
index 7d53de60a1..0000000000
--- a/compat/thrust/system/omp/detail/memory.inl
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/omp/memory.h>
-#include <thrust/system/cpp/memory.h>
-#include <limits>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-
-
-template<typename T>
-  template<typename OtherT>
-    reference<T> &
-      reference<T>
-        ::operator=(const reference<OtherT> &other)
-{
-  return super_t::operator=(other);
-} // end reference::operator=()
-
-template<typename T>
-  reference<T> &
-    reference<T>
-      ::operator=(const value_type &x)
-{
-  return super_t::operator=(x);
-} // end reference::operator=()
-
-template<typename T>
-__host__ __device__
-void swap(reference<T> a, reference<T> b)
-{
-  a.swap(b);
-} // end swap()
-
-namespace detail
-{
-
-// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc
-//     is not defined
-//     WAR the problem by using adl to call cpp::malloc, which requires it to depend
-//     on a template parameter
-template<typename Tag>
-  pointer<void> malloc_workaround(Tag t, std::size_t n)
-{
-  return pointer<void>(malloc(t, n));
-} // end malloc_workaround()
-
-// XXX circular #inclusion problems cause the compiler to believe that cpp::free
-//     is not defined
-//     WAR the problem by using adl to call cpp::free, which requires it to depend
-//     on a template parameter
-template<typename Tag>
-  void free_workaround(Tag t, pointer<void> ptr)
-{
-  free(t, ptr.get());
-} // end free_workaround()
-
-} // end detail
-
-inline pointer<void> malloc(std::size_t n)
-{
-  // XXX this is how we'd like to implement this function,
-  //     if not for circular #inclusion problems:
-  //
-  // return pointer<void>(thrust::system::cpp::malloc(n))
-  //
-  return detail::malloc_workaround(cpp::tag(), n);
-} // end malloc()
-
-template<typename T>
-pointer<T> malloc(std::size_t n)
-{
-  pointer<void> raw_ptr = thrust::system::omp::malloc(sizeof(T) * n);
-  return pointer<T>(reinterpret_cast<T*>(raw_ptr.get()));
-} // end malloc()
-
-inline void free(pointer<void> ptr)
-{
-  // XXX this is how we'd like to implement this function,
-  //     if not for circular #inclusion problems:
-  //
-  // thrust::system::cpp::free(ptr)
-  //
-  detail::free_workaround(cpp::tag(), ptr);
-} // end free()
-
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/merge.h b/compat/thrust/system/omp/detail/merge.h
deleted file mode 100644
index a7047aa028..0000000000
--- a/compat/thrust/system/omp/detail/merge.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits merge
-#include <thrust/system/cpp/detail/merge.h>
-
diff --git a/compat/thrust/system/omp/detail/mismatch.h b/compat/thrust/system/omp/detail/mismatch.h
deleted file mode 100644
index 03980cfcd5..0000000000
--- a/compat/thrust/system/omp/detail/mismatch.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits mismatch
-#include <thrust/system/cpp/detail/mismatch.h>
-
diff --git a/compat/thrust/system/omp/detail/par.h b/compat/thrust/system/omp/detail/par.h
deleted file mode 100644
index fa6d18e64c..0000000000
--- a/compat/thrust/system/omp/detail/par.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/detail/execute_with_allocator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-struct par_t : thrust::system::omp::detail::execution_policy<par_t>
-{
-  par_t() : thrust::system::omp::detail::execution_policy<par_t>() {}
-
-  template<typename Allocator>
-    thrust::detail::execute_with_allocator<Allocator, thrust::system::omp::detail::execution_policy>
-      operator()(Allocator &alloc) const
-  {
-    return thrust::detail::execute_with_allocator<Allocator, thrust::system::omp::detail::execution_policy>(alloc);
-  }
-};
-
-
-} // end detail
-
-
-static const detail::par_t par;
-
-
-} // end omp
-} // end system
-
-
-// alias par here
-namespace omp
-{
-
-
-using thrust::system::omp::par;
-
-
-} // end omp
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/partition.h b/compat/thrust/system/omp/detail/partition.h
deleted file mode 100644
index edcbc30834..0000000000
--- a/compat/thrust/system/omp/detail/partition.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief OpenMP implementation of reduce algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred);
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/partition.inl>
-
diff --git a/compat/thrust/system/omp/detail/partition.inl b/compat/thrust/system/omp/detail/partition.inl
deleted file mode 100644
index da629e5c69..0000000000
--- a/compat/thrust/system/omp/detail/partition.inl
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief OpenMP implementation of reduce algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/partition.h>
-#include <thrust/system/detail/generic/partition.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  // omp prefers generic::stable_partition to cpp::stable_partition
-  return thrust::system::detail::generic::stable_partition(exec, first, last, pred);
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  // omp prefers generic::stable_partition to cpp::stable_partition
-  return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred);
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy
-  return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy
-  return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/reduce.h b/compat/thrust/system/omp/detail/reduce.h
deleted file mode 100644
index 0cc5cebc2f..0000000000
--- a/compat/thrust/system/omp/detail/reduce.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief OpenMP implementation of reduce algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/reduce.inl>
-
diff --git a/compat/thrust/system/omp/detail/reduce.inl b/compat/thrust/system/omp/detail/reduce.inl
deleted file mode 100644
index 1347bfd004..0000000000
--- a/compat/thrust/system/omp/detail/reduce.inl
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/omp/detail/reduce.h>
-#include <thrust/system/omp/detail/default_decomposition.h>
-#include <thrust/system/omp/detail/reduce_intervals.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_difference<InputIterator>::type difference_type;
-
-  const difference_type n = thrust::distance(first,last);
-
-  // determine first and second level decomposition
-  thrust::system::detail::internal::uniform_decomposition<difference_type> decomp1 = thrust::system::omp::detail::default_decomposition(n);
-  thrust::system::detail::internal::uniform_decomposition<difference_type> decomp2(decomp1.size() + 1, 1, 1);
-
-  // allocate storage for the initializer and partial sums
-  // XXX use select_system for Tag
-  thrust::detail::temporary_array<OutputType,DerivedPolicy> partial_sums(exec, decomp1.size() + 1);
-  
-  // set first element of temp array to init
-  partial_sums[0] = init;
-  
-  // accumulate partial sums (first level reduction)
-  thrust::system::omp::detail::reduce_intervals(exec, first, partial_sums.begin() + 1, binary_op, decomp1);
-
-  // reduce partial sums (second level reduction)
-  thrust::system::omp::detail::reduce_intervals(exec, partial_sums.begin(), partial_sums.begin(), binary_op, decomp2);
-
-  return partial_sums[0];
-} // end reduce()
-
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/reduce_by_key.h b/compat/thrust/system/omp/detail/reduce_by_key.h
deleted file mode 100644
index d7243ee0ca..0000000000
--- a/compat/thrust/system/omp/detail/reduce_by_key.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief OpenMP implementation of reduce algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/reduce_by_key.inl>
-
diff --git a/compat/thrust/system/omp/detail/reduce_by_key.inl b/compat/thrust/system/omp/detail/reduce_by_key.inl
deleted file mode 100644
index 91402d8280..0000000000
--- a/compat/thrust/system/omp/detail/reduce_by_key.inl
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/reduce_by_key.h>
-#include <thrust/system/detail/generic/reduce_by_key.h>
-#include <thrust/distance.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate,
-          typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op)
-{
-  // omp prefers generic::reduce_by_key to cpp::reduce_by_key
-  return thrust::system::detail::generic::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
-} // end reduce_by_key()
-
-
-} // end detail
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/detail/reduce_intervals.h b/compat/thrust/system/omp/detail/reduce_intervals.h
deleted file mode 100644
index 7bce2074a3..0000000000
--- a/compat/thrust/system/omp/detail/reduce_intervals.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce_intervals.h
- *  \brief OpenMP implementations of reduce_intervals algorithms.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition>
-void reduce_intervals(execution_policy<DerivedPolicy> &exec,
-                      InputIterator input,
-                      OutputIterator output,
-                      BinaryFunction binary_op,
-                      Decomposition decomp);
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/reduce_intervals.inl>
-
diff --git a/compat/thrust/system/omp/detail/reduce_intervals.inl b/compat/thrust/system/omp/detail/reduce_intervals.inl
deleted file mode 100644
index 0752b8aab9..0000000000
--- a/compat/thrust/system/omp/detail/reduce_intervals.inl
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/reduce_intervals.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-#include <thrust/detail/cstdint.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template <typename DerivedPolicy,
-          typename InputIterator,
-          typename OutputIterator,
-          typename BinaryFunction,
-          typename Decomposition>
-void reduce_intervals(execution_policy<DerivedPolicy> &,
-                      InputIterator input,
-                      OutputIterator output,
-                      BinaryFunction binary_op,
-                      Decomposition decomp)
-{
-  // we're attempting to launch an omp kernel, assert we're compiling with omp support
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to enable OpenMP support in your compiler.                  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<InputIterator,
-                        (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
-
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-  typedef typename thrust::iterator_value<OutputIterator>::type OutputType;
-
-  // wrap binary_op
-  thrust::detail::host_function<BinaryFunction,OutputType> wrapped_binary_op(binary_op);
-
-  typedef thrust::detail::intptr_t index_type;
-
-  index_type n = static_cast<index_type>(decomp.size());
-
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-# pragma omp parallel for
-#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
-  for(index_type i = 0; i < n; i++)
-  {
-    InputIterator begin = input + decomp[i].begin();
-    InputIterator end   = input + decomp[i].end();
-
-    if (begin != end)
-    {
-      OutputType sum = thrust::raw_reference_cast(*begin);
-
-      ++begin;
-
-      while (begin != end)
-      {
-        sum = wrapped_binary_op(sum, *begin);
-        ++begin;
-      }
-
-      OutputIterator tmp = output + i;
-      *tmp = sum;
-    }
-  }
-#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
-}
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/remove.h b/compat/thrust/system/omp/detail/remove.h
deleted file mode 100644
index ebcb49613c..0000000000
--- a/compat/thrust/system/omp/detail/remove.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/remove.inl>
-
diff --git a/compat/thrust/system/omp/detail/remove.inl b/compat/thrust/system/omp/detail/remove.inl
deleted file mode 100644
index c056f967e4..0000000000
--- a/compat/thrust/system/omp/detail/remove.inl
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/remove.h>
-#include <thrust/system/detail/generic/remove.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  // omp prefers generic::remove_if to cpp::remove_if
-  return thrust::system::detail::generic::remove_if(exec, first, last, pred);
-}
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  // omp prefers generic::remove_if to cpp::remove_if
-  return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred);
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // omp prefers generic::remove_copy_if to cpp::remove_copy_if
-  return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred);
-}
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // omp prefers generic::remove_copy_if to cpp::remove_copy_if
-  return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred);
-}
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/replace.h b/compat/thrust/system/omp/detail/replace.h
deleted file mode 100644
index c48555d0e1..0000000000
--- a/compat/thrust/system/omp/detail/replace.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits this algorithm
-#include <thrust/system/cpp/detail/scatter.h>
-
diff --git a/compat/thrust/system/omp/detail/reverse.h b/compat/thrust/system/omp/detail/reverse.h
deleted file mode 100644
index 04923d1f6a..0000000000
--- a/compat/thrust/system/omp/detail/reverse.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits reverse
-#include <thrust/system/cpp/detail/reverse.h>
-
diff --git a/compat/thrust/system/omp/detail/scan.h b/compat/thrust/system/omp/detail/scan.h
deleted file mode 100644
index c105951fff..0000000000
--- a/compat/thrust/system/omp/detail/scan.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits scan
-#include <thrust/system/cpp/detail/scan.h>
-
diff --git a/compat/thrust/system/omp/detail/scan_by_key.h b/compat/thrust/system/omp/detail/scan_by_key.h
deleted file mode 100644
index bfbd5d69bd..0000000000
--- a/compat/thrust/system/omp/detail/scan_by_key.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits this algorithm
-#include <thrust/system/cpp/detail/scan_by_key.h>
-
diff --git a/compat/thrust/system/omp/detail/scatter.h b/compat/thrust/system/omp/detail/scatter.h
deleted file mode 100644
index c48555d0e1..0000000000
--- a/compat/thrust/system/omp/detail/scatter.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits this algorithm
-#include <thrust/system/cpp/detail/scatter.h>
-
diff --git a/compat/thrust/system/omp/detail/sequence.h b/compat/thrust/system/omp/detail/sequence.h
deleted file mode 100644
index 811d8f5fbb..0000000000
--- a/compat/thrust/system/omp/detail/sequence.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits sequence
-#include <thrust/system/cpp/detail/sequence.h>
-
diff --git a/compat/thrust/system/omp/detail/set_operations.h b/compat/thrust/system/omp/detail/set_operations.h
deleted file mode 100644
index 687edb2e7d..0000000000
--- a/compat/thrust/system/omp/detail/set_operations.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits set_operations
-#include <thrust/system/cpp/detail/set_operations.h>
-
diff --git a/compat/thrust/system/omp/detail/sort.h b/compat/thrust/system/omp/detail/sort.h
deleted file mode 100644
index 9a480f2799..0000000000
--- a/compat/thrust/system/omp/detail/sort.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(execution_policy<DerivedPolicy> &exec,
-                 RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp);
-    
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                        RandomAccessIterator1 keys_first,
-                        RandomAccessIterator1 keys_last,
-                        RandomAccessIterator2 values_first,
-                        StrictWeakOrdering comp);
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/sort.inl>
-
diff --git a/compat/thrust/system/omp/detail/sort.inl b/compat/thrust/system/omp/detail/sort.inl
deleted file mode 100644
index ab4f4a1bbe..0000000000
--- a/compat/thrust/system/omp/detail/sort.inl
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#include <thrust/detail/config.h>
-
-// don't attempt to #include this file without omp support
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-#include <omp.h>
-#endif // omp support
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/system/detail/generic/select_system.h>
-#include <thrust/system/cpp/detail/sort.h>
-#include <thrust/system/cpp/detail/merge.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/detail/temporary_array.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-namespace sort_detail
-{
-
-
-template <typename DerivedPolicy,
-          typename RandomAccessIterator,
-          typename StrictWeakOrdering>
-void inplace_merge(execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator middle,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type value_type;
-
-  thrust::detail::temporary_array<value_type,DerivedPolicy> a(exec, first, middle);
-  thrust::detail::temporary_array<value_type,DerivedPolicy> b(exec, middle, last);
-
-  thrust::system::cpp::detail::merge(exec, a.begin(), a.end(), b.begin(), b.end(), first, comp);
-}
-
-
-template <typename DerivedPolicy,
-          typename RandomAccessIterator1,
-          typename RandomAccessIterator2,
-          typename StrictWeakOrdering>
-void inplace_merge_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 first1,
-                          RandomAccessIterator1 middle1,
-                          RandomAccessIterator1 last1,
-                          RandomAccessIterator2 first2,
-                          StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type1;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type value_type2;
-
-  RandomAccessIterator2 middle2 = first2 + (middle1 - first1);
-  RandomAccessIterator2 last2   = first2 + (last1   - first1);
-
-  thrust::detail::temporary_array<value_type1,DerivedPolicy> lhs1(exec, first1, middle1);
-  thrust::detail::temporary_array<value_type1,DerivedPolicy> rhs1(exec, middle1, last1);
-  thrust::detail::temporary_array<value_type2,DerivedPolicy> lhs2(exec, first2, middle2);
-  thrust::detail::temporary_array<value_type2,DerivedPolicy> rhs2(exec, middle2, last2);
-
-  thrust::system::cpp::detail::merge_by_key
-    (exec,
-     lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(),
-     lhs2.begin(), rhs2.begin(),
-     first1, first2, comp);
-}
-
-
-} // end sort_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(execution_policy<DerivedPolicy> &exec,
-                 RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp)
-{
-  // we're attempting to launch an omp kernel, assert we're compiling with omp support
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to enable OpenMP support in your compiler.                  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator,
-                        (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
-
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-  typedef typename thrust::iterator_difference<RandomAccessIterator>::type IndexType;
-  
-  if (first == last)
-    return;
-
-  #pragma omp parallel
-  {
-    thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(last - first, 1, omp_get_num_threads());
-
-    // process id
-    IndexType p_i = omp_get_thread_num();
-
-    // every thread sorts its own tile
-    if (p_i < decomp.size())
-    {
-      thrust::system::cpp::detail::stable_sort(exec,
-                                               first + decomp[p_i].begin(),
-                                               first + decomp[p_i].end(),
-                                               comp);
-    }
-
-    #pragma omp barrier
-
-    IndexType nseg = decomp.size();
-    IndexType h = 2;
-
-    // keep track of which sub-range we're processing
-    IndexType a=p_i, b=p_i, c=p_i+1;
-
-    while( nseg>1 )
-    {
-        if(c >= decomp.size())
-          c = decomp.size() - 1;
-
-        if((p_i % h) == 0 && c > b)
-        {
-          thrust::system::omp::detail::sort_detail::inplace_merge
-            (exec,
-             first + decomp[a].begin(),
-             first + decomp[b].end(),
-             first + decomp[c].end(),
-             comp);
-            b = c;
-            c += h;
-        }
-
-        nseg = (nseg + 1) / 2;
-        h *= 2;
-
-        #pragma omp barrier
-    }
-  }
-#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
-}
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                        RandomAccessIterator1 keys_first,
-                        RandomAccessIterator1 keys_last,
-                        RandomAccessIterator2 values_first,
-                        StrictWeakOrdering comp)
-{
-  // we're attempting to launch an omp kernel, assert we're compiling with omp support
-  // ========================================================================
-  // X Note to the user: If you've found this line due to a compiler error, X
-  // X you need to enable OpenMP support in your compiler.                  X
-  // ========================================================================
-  THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation<RandomAccessIterator1,
-                        (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)>::value) );
-
-#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE)
-  typedef typename thrust::iterator_difference<RandomAccessIterator1>::type IndexType;
-  
-  if (keys_first == keys_last)
-    return;
-
-  #pragma omp parallel
-  {
-    thrust::system::detail::internal::uniform_decomposition<IndexType> decomp(keys_last - keys_first, 1, omp_get_num_threads());
-
-    // process id
-    IndexType p_i = omp_get_thread_num();
-
-    // every thread sorts its own tile
-    if (p_i < decomp.size())
-    {
-      thrust::system::cpp::detail::stable_sort_by_key(exec,
-                                                      keys_first + decomp[p_i].begin(),
-                                                      keys_first + decomp[p_i].end(),
-                                                      values_first + decomp[p_i].begin(),
-                                                      comp);
-    }
-
-    #pragma omp barrier
-
-    IndexType nseg = decomp.size();
-    IndexType h = 2;
-
-    // keep track of which sub-range we're processing
-    IndexType a=p_i, b=p_i, c=p_i+1;
-
-    while( nseg>1 )
-    {
-        if(c >= decomp.size())
-          c = decomp.size() - 1;
-
-        if((p_i % h) == 0 && c > b)
-        {
-          thrust::system::omp::detail::sort_detail::inplace_merge_by_key
-            (exec,
-             keys_first + decomp[a].begin(),
-             keys_first + decomp[b].end(),
-             keys_first + decomp[c].end(),
-             values_first + decomp[a].begin(),
-             comp);
-            b = c;
-            c += h;
-        }
-
-        nseg = (nseg + 1) / 2;
-        h *= 2;
-
-        #pragma omp barrier
-    }
-  }
-#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE
-}
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/swap_ranges.h b/compat/thrust/system/omp/detail/swap_ranges.h
deleted file mode 100644
index e683aaaa6e..0000000000
--- a/compat/thrust/system/omp/detail/swap_ranges.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// omp inherits swap_ranges
-#include <thrust/system/cpp/detail/swap_ranges.h>
-
diff --git a/compat/thrust/system/omp/detail/tabulate.h b/compat/thrust/system/omp/detail/tabulate.h
deleted file mode 100644
index da65d8e44d..0000000000
--- a/compat/thrust/system/omp/detail/tabulate.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits tabulate
-#include <thrust/system/cpp/detail/tabulate.h>
-
diff --git a/compat/thrust/system/omp/detail/temporary_buffer.h b/compat/thrust/system/omp/detail/temporary_buffer.h
deleted file mode 100644
index 628bd75719..0000000000
--- a/compat/thrust/system/omp/detail/temporary_buffer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special temporary buffer functions
-
diff --git a/compat/thrust/system/omp/detail/transform.h b/compat/thrust/system/omp/detail/transform.h
deleted file mode 100644
index 70ce1f41b6..0000000000
--- a/compat/thrust/system/omp/detail/transform.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// omp inherits transform
-#include <thrust/system/cpp/detail/transform.h>
-
diff --git a/compat/thrust/system/omp/detail/transform_reduce.h b/compat/thrust/system/omp/detail/transform_reduce.h
deleted file mode 100644
index 23ed07054a..0000000000
--- a/compat/thrust/system/omp/detail/transform_reduce.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits transform_reduce
-#include <thrust/system/cpp/detail/transform_reduce.h>
-
diff --git a/compat/thrust/system/omp/detail/transform_scan.h b/compat/thrust/system/omp/detail/transform_scan.h
deleted file mode 100644
index fc2e55d0c0..0000000000
--- a/compat/thrust/system/omp/detail/transform_scan.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits transform_scan
-#include <thrust/system/cpp/detail/transform_scan.h>
-
diff --git a/compat/thrust/system/omp/detail/uninitialized_copy.h b/compat/thrust/system/omp/detail/uninitialized_copy.h
deleted file mode 100644
index 944f4baf0e..0000000000
--- a/compat/thrust/system/omp/detail/uninitialized_copy.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits uninitialized_copy
-#include <thrust/system/cpp/detail/uninitialized_copy.h>
-
diff --git a/compat/thrust/system/omp/detail/uninitialized_fill.h b/compat/thrust/system/omp/detail/uninitialized_fill.h
deleted file mode 100644
index b9d6de20fa..0000000000
--- a/compat/thrust/system/omp/detail/uninitialized_fill.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits uninitialized_fill
-#include <thrust/system/cpp/detail/uninitialized_fill.h>
-
diff --git a/compat/thrust/system/omp/detail/unique.h b/compat/thrust/system/omp/detail/unique.h
deleted file mode 100644
index 60c617bee9..0000000000
--- a/compat/thrust/system/omp/detail/unique.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred);
-
-
-} // end namespace detail
-} // end namespace omp 
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/unique.inl>
-
diff --git a/compat/thrust/system/omp/detail/unique.inl b/compat/thrust/system/omp/detail/unique.inl
deleted file mode 100644
index d66ac3bf4f..0000000000
--- a/compat/thrust/system/omp/detail/unique.inl
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/unique.h>
-#include <thrust/system/detail/generic/unique.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  // omp prefers generic::unique to cpp::unique
-  return thrust::system::detail::generic::unique(exec,first,last,binary_pred);
-} // end unique()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  // omp prefers generic::unique_copy to cpp::unique_copy
-  return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred);
-} // end unique_copy()
-
-
-} // end namespace detail
-} // end namespace omp 
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/unique_by_key.h b/compat/thrust/system/omp/detail/unique_by_key.h
deleted file mode 100644
index 8fdde66edf..0000000000
--- a/compat/thrust/system/omp/detail/unique_by_key.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred);
-
-
-} // end namespace detail
-} // end namespace omp 
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/omp/detail/unique_by_key.inl>
-
diff --git a/compat/thrust/system/omp/detail/unique_by_key.inl b/compat/thrust/system/omp/detail/unique_by_key.inl
deleted file mode 100644
index 644b5ed6aa..0000000000
--- a/compat/thrust/system/omp/detail/unique_by_key.inl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/unique_by_key.h>
-#include <thrust/system/detail/generic/unique_by_key.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  // omp prefers generic::unique_by_key to cpp::unique_by_key
-  return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred);
-} // end unique_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  // omp prefers generic::unique_by_key_copy to cpp::unique_by_key_copy
-  return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred);
-} // end unique_by_key_copy()
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/omp/detail/vector.inl b/compat/thrust/system/omp/detail/vector.inl
deleted file mode 100644
index 32c845c4ad..0000000000
--- a/compat/thrust/system/omp/detail/vector.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/vector.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector()
-      : super_t()
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n)
-      : super_t(n)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n, const value_type &value)
-      : super_t(n,value)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(const vector &x)
-      : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const std::vector<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename InputIterator>
-    vector<T,Allocator>
-      ::vector(InputIterator first, InputIterator last)
-        : super_t(first,last)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const std::vector<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-      
-} // end omp
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/omp/execution_policy.h b/compat/thrust/system/omp/execution_policy.h
deleted file mode 100644
index 7d5d1d80d3..0000000000
--- a/compat/thrust/system/omp/execution_policy.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-/*! \file thrust/system/omp/execution_policy.h
- *  \brief Execution policies for Thrust's OpenMP system.
- */
-
-#include <thrust/detail/config.h>
-
-// get the execution policies definitions first
-#include <thrust/system/omp/detail/execution_policy.h>
-
-// get the definition of par
-#include <thrust/system/omp/detail/par.h>
-
-// now get all the algorithm definitions
-
-#include <thrust/system/omp/detail/adjacent_difference.h>
-#include <thrust/system/omp/detail/assign_value.h>
-#include <thrust/system/omp/detail/binary_search.h>
-#include <thrust/system/omp/detail/copy.h>
-#include <thrust/system/omp/detail/copy_if.h>
-#include <thrust/system/omp/detail/count.h>
-#include <thrust/system/omp/detail/equal.h>
-#include <thrust/system/omp/detail/extrema.h>
-#include <thrust/system/omp/detail/fill.h>
-#include <thrust/system/omp/detail/find.h>
-#include <thrust/system/omp/detail/for_each.h>
-#include <thrust/system/omp/detail/gather.h>
-#include <thrust/system/omp/detail/generate.h>
-#include <thrust/system/omp/detail/get_value.h>
-#include <thrust/system/omp/detail/inner_product.h>
-#include <thrust/system/omp/detail/iter_swap.h>
-#include <thrust/system/omp/detail/logical.h>
-#include <thrust/system/omp/detail/malloc_and_free.h>
-#include <thrust/system/omp/detail/merge.h>
-#include <thrust/system/omp/detail/mismatch.h>
-#include <thrust/system/omp/detail/partition.h>
-#include <thrust/system/omp/detail/reduce.h>
-#include <thrust/system/omp/detail/reduce_by_key.h>
-#include <thrust/system/omp/detail/remove.h>
-#include <thrust/system/omp/detail/replace.h>
-#include <thrust/system/omp/detail/reverse.h>
-#include <thrust/system/omp/detail/scan.h>
-#include <thrust/system/omp/detail/scan_by_key.h>
-#include <thrust/system/omp/detail/scatter.h>
-#include <thrust/system/omp/detail/sequence.h>
-#include <thrust/system/omp/detail/set_operations.h>
-#include <thrust/system/omp/detail/sort.h>
-#include <thrust/system/omp/detail/swap_ranges.h>
-#include <thrust/system/omp/detail/tabulate.h>
-#include <thrust/system/omp/detail/transform.h>
-#include <thrust/system/omp/detail/transform_reduce.h>
-#include <thrust/system/omp/detail/transform_scan.h>
-#include <thrust/system/omp/detail/uninitialized_copy.h>
-#include <thrust/system/omp/detail/uninitialized_fill.h>
-#include <thrust/system/omp/detail/unique.h>
-#include <thrust/system/omp/detail/unique_by_key.h>
-
-
-// define these entities here for the purpose of Doxygenating them
-// they are actually defined elsewhere
-#if 0
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-
-
-/*! \addtogroup execution_policies
- *  \{
- */
-
-
-/*! \p thrust::omp::execution_policy is the base class for all Thrust parallel execution
- *  policies which are derived from Thrust's OpenMP backend system.
- */
-template<typename DerivedPolicy>
-struct execution_policy : thrust::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p omp::tag is a type representing Thrust's standard C++ backend system in C++'s type system.
- *  Iterators "tagged" with a type which is convertible to \p omp::tag assert that they may be
- *  "dispatched" to algorithm implementations in the \p omp system.
- */
-struct tag : thrust::system::omp::execution_policy<tag> { unspecified };
-
-
-/*! \p thrust::omp::par is the parallel execution policy associated with Thrust's OpenMP
- *  backend system.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may
- *  directly target Thrust's OpenMP backend system by providing \p thrust::omp::par as an algorithm
- *  parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such
- *  as \p thrust::omp::vector.
- *
- *  The type of \p thrust::omp::par is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::omp::par to explicitly dispatch an
- *  invocation of \p thrust::for_each to the OpenMP backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/system/omp/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  int vec[3];
- *  vec[0] = 0; vec[1] = 1; vec[2] = 2;
- *
- *  thrust::for_each(thrust::omp::par, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- */
-static const unspecified par;
-
-
-/*! \}
- */
-
-
-} // end cpp
-} // end system
-} // end thrust
-#endif
-
-
diff --git a/compat/thrust/system/omp/memory.h b/compat/thrust/system/omp/memory.h
deleted file mode 100644
index 0a23434833..0000000000
--- a/compat/thrust/system/omp/memory.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/omp/memory.h
- *  \brief Managing memory associated with Thrust's OpenMP system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-#include <thrust/memory.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/allocator/malloc_allocator.h>
-#include <ostream>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-
-template<typename> class pointer;
-
-} // end omp
-} // end system
-} // end thrust
-
-
-/*! \cond
- */
-
-// specialize std::iterator_traits to avoid problems with the name of
-// pointer's constructor shadowing its nested pointer type
-// do this before pointer is defined so the specialization is correctly
-// used inside the definition
-namespace std
-{
-
-template<typename Element>
-  struct iterator_traits<thrust::system::omp::pointer<Element> >
-{
-  private:
-    typedef thrust::system::omp::pointer<Element> ptr;
-
-  public:
-    typedef typename ptr::iterator_category       iterator_category;
-    typedef typename ptr::value_type              value_type;
-    typedef typename ptr::difference_type         difference_type;
-    typedef ptr                                   pointer;
-    typedef typename ptr::reference               reference;
-}; // end iterator_traits
-
-} // end std
-
-/*! \endcond
- */
-
-
-namespace thrust
-{
-namespace system
-{
-
-/*! \addtogroup system_backends Systems
- *  \ingroup system
- *  \{
- */
-
-/*! \namespace thrust::system::omp
- *  \brief \p thrust::system::omp is the namespace containing functionality for allocating, manipulating,
- *         and deallocating memory available to Thrust's OpenMP backend system.
- *         The identifiers are provided in a separate namespace underneath <tt>thrust::system</tt>
- *         for import convenience but are also aliased in the top-level <tt>thrust::omp</tt>
- *         namespace for easy access.
- *
- */
-namespace omp
-{
-
-// forward declaration of reference for pointer
-template<typename Element> class reference;
-
-/*! \cond
- */
-
-// XXX nvcc + msvc have trouble instantiating reference below
-//     this is a workaround
-namespace detail
-{
-
-template<typename Element>
-  struct reference_msvc_workaround
-{
-  typedef thrust::system::omp::reference<Element> type;
-}; // end reference_msvc_workaround
-
-} // end detail
-
-/*! \endcond
- */
-
-
-/*! \p pointer stores a pointer to an object allocated in memory available to the omp system.
- *  This type provides type safety when dispatching standard algorithms on ranges resident
- *  in omp memory.
- *
- *  \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic.
- *
- *  \p pointer can be created with the function \p omp::malloc, or by explicitly calling its constructor
- *  with a raw pointer.
- *
- *  The raw pointer encapsulated by a \p pointer may be obtained by eiter its <tt>get</tt> member function
- *  or the \p raw_pointer_cast function.
- *
- *  \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory
- *  pointed to by \p pointer.
- *
- *  \tparam T specifies the type of the pointee.
- *
- *  \see omp::malloc
- *  \see omp::free
- *  \see raw_pointer_cast
- */
-template<typename T>
-  class pointer
-    : public thrust::pointer<
-               T,
-               thrust::system::omp::tag,
-               thrust::system::omp::reference<T>,
-               thrust::system::omp::pointer<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::pointer<
-      T,
-      thrust::system::omp::tag,
-      //thrust::system::omp::reference<T>,
-      typename detail::reference_msvc_workaround<T>::type,
-      thrust::system::omp::pointer<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    // note that omp::pointer's member functions need __host__ __device__
-    // to interoperate with nvcc + iterators' dereference member function
-
-    /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0.
-     */
-    __host__ __device__
-    pointer() : super_t() {}
-
-    /*! This constructor allows construction of a <tt>pointer<const T></tt> from a <tt>T*</tt>.
-     *
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in memory
-     *         accessible by the \p omp system.
-     *  \tparam OtherT \p OtherT shall be convertible to \p T.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    explicit pointer(OtherT *ptr) : super_t(ptr) {}
-
-    /*! This constructor allows construction from another pointer-like object with related type.
-     *
-     *  \param other The \p OtherPointer to copy.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::omp::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer
-            >::type * = 0) : super_t(other) {}
-
-    /*! Assignment operator allows assigning from another pointer-like object with related type.
-     *
-     *  \param other The other pointer-like object to assign from.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::omp::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      pointer &
-    >::type
-    operator=(const OtherPointer &other)
-    {
-      return super_t::operator=(other);
-    }
-}; // end pointer
-
-
-/*! \p reference is a wrapped reference to an object stored in memory available to the \p omp system.
- *  \p reference is the type of the result of dereferencing a \p omp::pointer.
- *
- *  \tparam T Specifies the type of the referenced object.
- */
-template<typename T>
-  class reference
-    : public thrust::reference<
-               T,
-               thrust::system::omp::pointer<T>,
-               thrust::system::omp::reference<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::reference<
-      T,
-      thrust::system::omp::pointer<T>,
-      thrust::system::omp::reference<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    /*! \cond
-     */
-
-    typedef typename super_t::value_type value_type;
-    typedef typename super_t::pointer    pointer;
-
-    /*! \endcond
-     */
-
-    /*! This constructor initializes this \p reference to refer to an object
-     *  pointed to by the given \p pointer. After this \p reference is constructed,
-     *  it shall refer to the object pointed to by \p ptr.
-     *
-     *  \param ptr A \p pointer to copy from.
-     */
-    __host__ __device__
-    explicit reference(const pointer &ptr)
-      : super_t(ptr)
-    {}
-
-    /*! This constructor accepts a const reference to another \p reference of related type.
-     *  After this \p reference is constructed, it shall refer to the same object as \p other.
-     *
-     *  \param other A \p reference to copy from.
-     *  \tparam OtherT The element type of the other \p reference.
-     *
-     *  \note This constructor is templated primarily to allow initialization of <tt>reference<const T></tt>
-     *        from <tt>reference<T></tt>.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    reference(const reference<OtherT> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherT>::pointer,
-                pointer
-              >::type * = 0)
-      : super_t(other)
-    {}
-
-    /*! Copy assignment operator copy assigns from another \p reference of related type.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>*this</tt>
-     *  \tparam OtherT The element type of the other \p reference.
-     */
-    template<typename OtherT>
-    reference &operator=(const reference<OtherT> &other);
-
-    /*! Assignment operator assigns from a \p value_type.
-     *
-     *  \param x The \p value_type to assign from.
-     *  \return <tt>*this</tt>
-     */
-    reference &operator=(const value_type &x);
-}; // end reference
-
-/*! Exchanges the values of two objects referred to by \p reference.
- *  \p x The first \p reference of interest.
- *  \p y The second \p reference ot interest.
- */
-template<typename T>
-__host__ __device__
-void swap(reference<T> x, reference<T> y);
-
-/*! Allocates an area of memory available to Thrust's <tt>omp</tt> system.
- *  \param n Number of bytes to allocate.
- *  \return A <tt>omp::pointer<void></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>omp::pointer<void></tt> is returned if
- *          an error occurs.
- *  \note The <tt>omp::pointer<void></tt> returned by this function must be
- *        deallocated with \p omp::free.
- *  \see omp::free
- *  \see std::malloc
- */
-inline pointer<void> malloc(std::size_t n);
-
-/*! Allocates a typed area of memory available to Thrust's <tt>omp</tt> system.
- *  \param n Number of elements to allocate.
- *  \return A <tt>omp::pointer<T></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>omp::pointer<T></tt> is returned if
- *          an error occurs.
- *  \note The <tt>omp::pointer<T></tt> returned by this function must be
- *        deallocated with \p omp::free.
- *  \see omp::free
- *  \see std::malloc
- */
-template<typename T>
-inline pointer<T> malloc(std::size_t n);
-
-/*! Deallocates an area of memory previously allocated by <tt>omp::malloc</tt>.
- *  \param ptr A <tt>omp::pointer<void></tt> pointing to the beginning of an area
- *         of memory previously allocated with <tt>omp::malloc</tt>.
- *  \see omp::malloc
- *  \see std::free
- */
-inline void free(pointer<void> ptr);
-
-// XXX upon c++11
-// template<typename T> using allocator = thrust::detail::malloc_allocator<T,tag,pointer<T> >;
-
-/*! \p omp::allocator is the default allocator used by the \p omp system's containers such as
- *  <tt>omp::vector</tt> if no user-specified allocator is provided. \p omp::allocator allocates
- *  (deallocates) storage with \p omp::malloc (\p omp::free).
- */
-template<typename T>
-  struct allocator
-    : thrust::detail::malloc_allocator<
-        T,
-        tag,
-        pointer<T>
-      >
-{
-  /*! The \p rebind metafunction provides the type of an \p allocator
-   *  instantiated with another type.
-   *
-   *  \tparam U The other type to use for instantiation.
-   */
-  template<typename U>
-    struct rebind
-  {
-    /*! The typedef \p other gives the type of the rebound \p allocator.
-     */
-    typedef allocator<U> other;
-  };
-
-  /*! No-argument constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator() {}
-
-  /*! Copy constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator(const allocator &) {}
-
-  /*! Constructor from other \p allocator has no effect.
-   */
-  template<typename U>
-  __host__ __device__
-  inline allocator(const allocator<U> &) {}
-
-  /*! Destructor has no effect.
-   */
-  __host__ __device__
-  inline ~allocator() {}
-}; // end allocator
-
-} // end omp
-
-/*! \}
- */
-
-} // end system
-
-/*! \namespace thrust::omp
- *  \brief \p thrust::omp is a top-level alias for thrust::system::omp.
- */
-namespace omp
-{
-
-using thrust::system::omp::pointer;
-using thrust::system::omp::reference;
-using thrust::system::omp::malloc;
-using thrust::system::omp::free;
-using thrust::system::omp::allocator;
-
-} // end omp
-
-} // end thrust
-
-#include <thrust/system/omp/detail/memory.inl>
-
diff --git a/compat/thrust/system/omp/vector.h b/compat/thrust/system/omp/vector.h
deleted file mode 100644
index 5f45a9169b..0000000000
--- a/compat/thrust/system/omp/vector.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/omp/vector.h
- *  \brief A dynamically-sizable array of elements which reside in memory available to
- *         Thrust's OpenMP system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/memory.h>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-
-// forward declaration of host_vector
-// XXX why is this here? it doesn't seem necessary for anything below
-template<typename T, typename Allocator> class host_vector;
-
-namespace system
-{
-namespace omp
-{
-
-// XXX upon c++11
-// template<typename T, typename Allocator = allocator<T> > using vector = thrust::detail::vector_base<T,Allocator>;
-
-/*! \p omp::vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p omp::vector may vary dynamically; memory management is
- *  automatic. The elements contained in an \p omp::vector reside in memory
- *  available to the \p omp system.
- *
- *  \tparam T The element type of the \p omp::vector.
- *  \tparam Allocator The allocator type of the \p omp::vector. Defaults to \p omp::allocator.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see host_vector For the documentation of the complete interface which is
- *                   shared by \p omp::vector
- *  \see device_vector
- */
-template<typename T, typename Allocator = allocator<T> >
-  class vector
-    : public thrust::detail::vector_base<T,Allocator>
-{
-  /*! \cond
-   */
-  private:
-    typedef thrust::detail::vector_base<T,Allocator> super_t;
-  /*! \endcond
-   */
-
-  public:
-    
-  /*! \cond
-   */
-    typedef typename super_t::size_type  size_type;
-    typedef typename super_t::value_type value_type;
-  /*! \endcond
-   */
-
-    /*! This constructor creates an empty \p omp::vector.
-     */
-    vector();
-
-    /*! This constructor creates a \p omp::vector with \p n default-constructed elements.
-     *  \param n The size of the \p omp::vector to create.
-     */
-    explicit vector(size_type n);
-
-    /*! This constructor creates a \p omp::vector with \p n copies of \p value.
-     *  \param n The size of the \p omp::vector to create.
-     *  \param value An element to copy.
-     */
-    explicit vector(size_type n, const value_type &value);
-
-    /*! Copy constructor copies from another \p omp::vector.
-     *  \param x The other \p omp::vector to copy.
-     */
-    vector(const vector &x);
-
-    /*! This constructor copies from another Thrust vector-like object.
-     *  \param x The other object to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-
-    /*! This constructor copies from a \c std::vector.
-     *  \param x The \c std::vector to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! This constructor creates an \p omp::vector by copying from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    vector(InputIterator first, InputIterator last);
-
-    // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns
-
-    /*! Assignment operator assigns from a \c std::vector.
-     *  \param x The \c std::vector to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! Assignment operator assigns from another Thrust vector-like object.
-     *  \param x The other object to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-}; // end vector
-
-} // end omp
-} // end system
-
-// alias system::omp names at top-level
-namespace omp
-{
-
-using thrust::system::omp::vector;
-
-} // end omp
-
-} // end thrust
-
-#include <thrust/system/omp/detail/vector.inl>
-
diff --git a/compat/thrust/system/system_error.h b/compat/thrust/system/system_error.h
deleted file mode 100644
index 6f94b61e0a..0000000000
--- a/compat/thrust/system/system_error.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file system/system_error.h
- *  \brief An exception object used to report error conditions that have an
- *         associated error code
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <stdexcept>
-#include <string>
-
-#include <thrust/system/error_code.h>
-
-namespace thrust
-{
-
-namespace system
-{
-
-// [19.5.5] Class system_error
-
-// [19.5.5.1] Class system_error overview
-
-/*! \addtogroup system_diagnostics System Diagnostics
- *  \ingroup system
- *  \{
- */
-
-/*! \brief The class \p system_error describes an exception object used to report error
- *  conditions that have an associated \p error_code. Such error conditions typically
- *  originate from the operating system or other low-level application program interfaces.
- *
- *  Thrust uses \p system_error to report the error codes returned from device backends
- *  such as the CUDA runtime.
- *
- *  The following code listing demonstrates how to catch a \p system_error to recover
- *  from an error.
- *
- *  \code
- *
- *  #include <thrust/device_vector.h>
- *  #include <thrust/system.h>
- *  #include <thrust/sort.h>
- *
- *  void terminate_gracefully(void)
- *  {
- *    // application-specific termination code here
- *    ...
- *  }
- *
- *  int main(void)
- *  {
- *    try
- *    {
- *      thrust::device_vector<float> vec;
- *      thrust::sort(vec.begin(), vec.end());
- *    }
- *    catch(thrust::system_error e)
- *    {
- *      std::cerr << "Error inside sort: " << e.what() << std::endl;
- *      terminate_gracefully();
- *    }
- *
- *    return 0;
- *  }
- *
- *  \endcode
- *
- *  \note If an error represents an out-of-memory condition, implementations are encouraged
- *  to throw an exception object of type \p std::bad_alloc rather than \p system_error.
- */
-class system_error
-  : public std::runtime_error
-{
-  public:
-    // [19.5.5.2] Class system_error members
-    
-    /*! Constructs an object of class \p system_error.
-     *  \param ec The value returned by \p code().
-     *  \param what_arg A string to include in the result returned by \p what().
-     *  \post <tt>code() == ec</tt>.
-     *  \post <tt>std::string(what()).find(what_arg) != string::npos</tt>.
-     */
-    inline system_error(error_code ec, const std::string &what_arg);
-
-    /*! Constructs an object of class \p system_error.
-     *  \param ec The value returned by \p code().
-     *  \param what_arg A string to include in the result returned by \p what().
-     *  \post <tt>code() == ec</tt>.
-     *  \post <tt>std::string(what()).find(what_arg) != string::npos</tt>.
-     */
-    inline system_error(error_code ec, const char *what_arg);
-
-    /*! Constructs an object of class \p system_error.
-     *  \param ec The value returned by \p code().
-     *  \post <tt>code() == ec</tt>.
-     */
-    inline system_error(error_code ec);
-
-    /*! Constructs an object of class \p system_error.
-     *  \param ev The error value used to create an \p error_code.
-     *  \param ecat The \p error_category used to create an \p error_code.
-     *  \param what_arg A string to include in the result returned by \p what().
-     *  \post <tt>code() == error_code(ev, ecat)</tt>.
-     *  \post <tt>std::string(what()).find(what_arg) != string::npos</tt>.
-     */
-    inline system_error(int ev, const error_category &ecat, const std::string &what_arg);
-
-    /*! Constructs an object of class \p system_error.
-     *  \param ev The error value used to create an \p error_code.
-     *  \param ecat The \p error_category used to create an \p error_code.
-     *  \param what_arg A string to include in the result returned by \p what().
-     *  \post <tt>code() == error_code(ev, ecat)</tt>.
-     *  \post <tt>std::string(what()).find(what_arg) != string::npos</tt>.
-     */
-    inline system_error(int ev, const error_category &ecat, const char *what_arg);
-
-    /*! Constructs an object of class \p system_error.
-     *  \param ev The error value used to create an \p error_code.
-     *  \param ecat The \p error_category used to create an \p error_code.
-     *  \post <tt>code() == error_code(ev, ecat)</tt>.
-     */
-    inline system_error(int ev, const error_category &ecat);
-
-    /*! Destructor does not throw.
-     */
-    inline virtual ~system_error(void) throw () {};
-    
-    /*! Returns an object encoding the error.
-     *  \return <tt>ec</tt> or <tt>error_code(ev, ecat)</tt>, from the
-     *          constructor, as appropriate.
-     */
-    inline const error_code &code(void) const throw();
-
-    /*! Returns a human-readable string indicating the nature of the error.
-     *  \return a string incorporating <tt>code().message()</tt> and the
-     *          arguments supplied in the constructor.
-     */
-    inline const char *what(void) const throw();
-
-    /*! \cond
-     */
-  private:
-    error_code          m_error_code;
-    mutable std::string m_what;
-
-    /*! \endcond
-     */
-}; // end system_error
-
-} // end system
-
-/*! \} // end system_diagnostics
- */
-
-// import names into thrust::
-using system::system_error;
-
-} // end thrust
-
-#include <thrust/system/detail/system_error.inl>
-
diff --git a/compat/thrust/system/tbb/detail/adjacent_difference.h b/compat/thrust/system/tbb/detail/adjacent_difference.h
deleted file mode 100644
index 37c9adc2c9..0000000000
--- a/compat/thrust/system/tbb/detail/adjacent_difference.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/system/detail/generic/adjacent_difference.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator adjacent_difference(execution_policy<DerivedPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     OutputIterator result,
-                                     BinaryFunction binary_op)
-{
-  // tbb prefers generic::adjacent_difference to cpp::adjacent_difference
-  return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op);
-} // end adjacent_difference()
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/assign_value.h b/compat/thrust/system/tbb/detail/assign_value.h
deleted file mode 100644
index eda3b977b8..0000000000
--- a/compat/thrust/system/tbb/detail/assign_value.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits assign_value
-#include <thrust/system/cpp/detail/assign_value.h>
-
diff --git a/compat/thrust/system/tbb/detail/binary_search.h b/compat/thrust/system/tbb/detail/binary_search.h
deleted file mode 100644
index 8dec989771..0000000000
--- a/compat/thrust/system/tbb/detail/binary_search.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits binary_search
-#include <thrust/system/cpp/detail/binary_search.h>
-
diff --git a/compat/thrust/system/tbb/detail/copy.h b/compat/thrust/system/tbb/detail/copy.h
deleted file mode 100644
index 7604e6f4ad..0000000000
--- a/compat/thrust/system/tbb/detail/copy.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputIterator result);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      Size n,
-                      OutputIterator result);
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/copy.inl>
-
diff --git a/compat/thrust/system/tbb/detail/copy.inl b/compat/thrust/system/tbb/detail/copy.inl
deleted file mode 100644
index 6d354d0b18..0000000000
--- a/compat/thrust/system/tbb/detail/copy.inl
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/copy.h>
-#include <thrust/system/detail/generic/copy.h>
-#include <thrust/detail/type_traits/minimum_type.h>
-#include <thrust/system/cpp/detail/copy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace dispatch
-{
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::incrementable_traversal_tag)
-{
-  return thrust::system::cpp::detail::copy(exec, first, last, result);
-} // end copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-  OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      OutputIterator result,
-                      thrust::random_access_traversal_tag)
-{
-  return thrust::system::detail::generic::copy(exec, first, last, result);
-} // end copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::incrementable_traversal_tag)
-{
-  return thrust::system::cpp::detail::copy_n(exec, first, n, result);
-} // end copy_n()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-  OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                        InputIterator first,
-                        Size n,
-                        OutputIterator result,
-                        thrust::random_access_traversal_tag)
-{
-  return thrust::system::detail::generic::copy_n(exec, first, n, result);
-} // end copy_n()
-
-} // end dispatch
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator copy(execution_policy<DerivedPolicy> &exec,
-                    InputIterator first,
-                    InputIterator last,
-                    OutputIterator result)
-{
-  typedef typename thrust::iterator_traversal<InputIterator>::type  traversal1;
-  typedef typename thrust::iterator_traversal<OutputIterator>::type traversal2;
-  
-  typedef typename thrust::detail::minimum_type<traversal1,traversal2>::type traversal;
-
-  // dispatch on minimum traversal
-  return thrust::system::tbb::detail::dispatch::copy(exec,first,last,result,traversal());
-} // end copy()
-
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename Size,
-         typename OutputIterator>
-OutputIterator copy_n(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      Size n,
-                      OutputIterator result)
-{
-  typedef typename thrust::iterator_traversal<InputIterator>::type  traversal1;
-  typedef typename thrust::iterator_traversal<OutputIterator>::type traversal2;
-  
-  typedef typename thrust::detail::minimum_type<traversal1,traversal2>::type traversal;
-
-  // dispatch on minimum traversal
-  return thrust::system::tbb::detail::dispatch::copy_n(exec,first,n,result,traversal());
-} // end copy_n()
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/copy_if.h b/compat/thrust/system/tbb/detail/copy_if.h
deleted file mode 100644
index ffbd4f8f46..0000000000
--- a/compat/thrust/system/tbb/detail/copy_if.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(tag,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred);
-
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
-#include <thrust/system/tbb/detail/copy_if.inl>
-
diff --git a/compat/thrust/system/tbb/detail/copy_if.inl b/compat/thrust/system/tbb/detail/copy_if.inl
deleted file mode 100644
index 4353b3b2f7..0000000000
--- a/compat/thrust/system/tbb/detail/copy_if.inl
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-#include <thrust/system/tbb/detail/copy_if.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <tbb/blocked_range.h>
-#include <tbb/parallel_scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace copy_if_detail
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate,
-         typename Size>
-struct body
-{
-
-  InputIterator1 first;
-  InputIterator2 stencil;
-  OutputIterator result;
-  thrust::detail::host_function<Predicate,bool> pred;
-  Size sum;
-
-  body(InputIterator1 first, InputIterator2 stencil, OutputIterator result, Predicate pred)
-    : first(first), stencil(stencil), result(result), pred(pred), sum(0)
-  {}
-
-  body(body& b, ::tbb::split)
-    : first(b.first), stencil(b.stencil), result(b.result), pred(b.pred), sum(0)
-  {}
-
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::pre_scan_tag)
-  {
-    InputIterator2 iter = stencil + r.begin();
-
-    for (Size i = r.begin(); i != r.end(); ++i, ++iter)
-    {
-      if (pred(*iter))
-        ++sum;
-    }
-  }
-  
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::final_scan_tag)
-  {
-    InputIterator1  iter1 = first   + r.begin();
-    InputIterator2  iter2 = stencil + r.begin();
-    OutputIterator  iter3 = result  + sum;
-      
-    for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2)
-    {
-      if (pred(*iter2))
-      {
-        *iter3 = *iter1;
-        ++sum;
-        ++iter3;
-      }
-    }
-  }
-
-  void reverse_join(body& b)
-  {
-    sum = b.sum + sum;
-  } 
-
-  void assign(body& b)
-  {
-    sum = b.sum;
-  } 
-}; // end body
-
-} // end copy_if_detail
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator copy_if(tag,
-                         InputIterator1 first,
-                         InputIterator1 last,
-                         InputIterator2 stencil,
-                         OutputIterator result,
-                         Predicate pred)
-{
-  typedef typename thrust::iterator_difference<InputIterator1>::type Size; 
-  typedef typename copy_if_detail::body<InputIterator1,InputIterator2,OutputIterator,Predicate,Size> Body;
-  
-  Size n = thrust::distance(first, last);
-
-  if (n != 0)
-  {
-    Body body(first, stencil, result, pred);
-    ::tbb::parallel_scan(::tbb::blocked_range<Size>(0,n), body);
-    thrust::advance(result, body.sum);
-  }
-
-  return result;
-} // end copy_if()
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/count.h b/compat/thrust/system/tbb/detail/count.h
deleted file mode 100644
index da31ee8700..0000000000
--- a/compat/thrust/system/tbb/detail/count.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits count
-#include <thrust/system/cpp/detail/count.h>
-
diff --git a/compat/thrust/system/tbb/detail/equal.h b/compat/thrust/system/tbb/detail/equal.h
deleted file mode 100644
index 74e55183d9..0000000000
--- a/compat/thrust/system/tbb/detail/equal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits equal
-#include <thrust/system/cpp/detail/equal.h>
-
diff --git a/compat/thrust/system/tbb/detail/execution_policy.h b/compat/thrust/system/tbb/detail/execution_policy.h
deleted file mode 100644
index 167d1dc4ce..0000000000
--- a/compat/thrust/system/tbb/detail/execution_policy.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/iterator/detail/any_system_tag.h>
-#include <thrust/detail/type_traits.h>
-
-namespace thrust
-{
-namespace system
-{
-// put the canonical tag in the same ns as the backend's entry points
-namespace tbb
-{
-namespace detail
-{
-
-// this awkward sequence of definitions arise
-// from the desire both for tag to derive
-// from execution_policy and for execution_policy
-// to convert to tag (when execution_policy is not
-// an ancestor of tag)
-
-// forward declaration of tag
-struct tag;
-
-// forward declaration of execution_policy
-template<typename> struct execution_policy;
-
-// specialize execution_policy for tag
-template<>
-  struct execution_policy<tag>
-    : thrust::system::cpp::detail::execution_policy<tag>
-{};
-
-// tag's definition comes before the
-// generic definition of execution_policy
-struct tag : execution_policy<tag> {};
-
-// allow conversion to tag when it is not a successor
-template<typename Derived>
-  struct execution_policy
-    : thrust::system::cpp::detail::execution_policy<Derived>
-{
-  // allow conversion to tag
-  inline operator tag () const
-  {
-    return tag();
-  }
-};
-
-} // end detail
-
-// alias execution_policy and tag here
-using thrust::system::tbb::detail::execution_policy;
-using thrust::system::tbb::detail::tag;
-
-} // end tbb
-} // end system
-
-// alias items at top-level
-namespace tbb
-{
-
-using thrust::system::tbb::execution_policy;
-using thrust::system::tbb::tag;
-
-} // end tbb
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/extrema.h b/compat/thrust/system/tbb/detail/extrema.h
deleted file mode 100644
index 4715a8948b..0000000000
--- a/compat/thrust/system/tbb/detail/extrema.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/system/detail/generic/extrema.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator max_element(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // tbb prefers generic::max_element to cpp::max_element
-  return thrust::system::detail::generic::max_element(exec, first, last, comp);
-} // end max_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-ForwardIterator min_element(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first, 
-                            ForwardIterator last,
-                            BinaryPredicate comp)
-{
-  // tbb prefers generic::min_element to cpp::min_element
-  return thrust::system::detail::generic::min_element(exec, first, last, comp);
-} // end min_element()
-
-template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
-thrust::pair<ForwardIterator,ForwardIterator> minmax_element(execution_policy<DerivedPolicy> &exec,
-                                                             ForwardIterator first, 
-                                                             ForwardIterator last,
-                                                             BinaryPredicate comp)
-{
-  // tbb prefers generic::minmax_element to cpp::minmax_element
-  return thrust::system::detail::generic::minmax_element(exec, first, last, comp);
-} // end minmax_element()
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
-
diff --git a/compat/thrust/system/tbb/detail/fill.h b/compat/thrust/system/tbb/detail/fill.h
deleted file mode 100644
index 5219e1c7c5..0000000000
--- a/compat/thrust/system/tbb/detail/fill.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits fill
-#include <thrust/system/cpp/detail/fill.h>
-
diff --git a/compat/thrust/system/tbb/detail/find.h b/compat/thrust/system/tbb/detail/find.h
deleted file mode 100644
index d351454c16..0000000000
--- a/compat/thrust/system/tbb/detail/find.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/detail/generic/find.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template <typename DerivedPolicy, typename InputIterator, typename Predicate>
-InputIterator find_if(execution_policy<DerivedPolicy> &exec,
-                      InputIterator first,
-                      InputIterator last,
-                      Predicate pred)
-{
-  // tbb prefers generic::find_if to cpp::find_if
-  return thrust::system::detail::generic::find_if(exec, first, last, pred);
-}
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/for_each.h b/compat/thrust/system/tbb/detail/for_each.h
deleted file mode 100644
index 573bb819a2..0000000000
--- a/compat/thrust/system/tbb/detail/for_each.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename UnaryFunction>
-  RandomAccessIterator for_each(execution_policy<DerivedPolicy> &exec,
-                                RandomAccessIterator first,
-                                RandomAccessIterator last,
-                                UnaryFunction f);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-  RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &exec,
-                                  RandomAccessIterator first,
-                                  Size n,
-                                  UnaryFunction f);
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/for_each.inl>
-
diff --git a/compat/thrust/system/tbb/detail/for_each.inl b/compat/thrust/system/tbb/detail/for_each.inl
deleted file mode 100644
index b09c7be881..0000000000
--- a/compat/thrust/system/tbb/detail/for_each.inl
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/distance.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/system/detail/internal/scalar/for_each.h>
-#include <tbb/blocked_range.h>
-#include <tbb/parallel_for.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace for_each_detail
-{
-
-template<typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-  struct body
-{
-  RandomAccessIterator m_first;
-  UnaryFunction m_f;
-
-  body(RandomAccessIterator first, UnaryFunction f)
-    : m_first(first), m_f(f)
-  {}
-
-  void operator()(const ::tbb::blocked_range<Size> &r) const
-  {
-    // we assume that blocked_range specifies a contiguous range of integers
-    thrust::system::detail::internal::scalar::for_each_n(m_first + r.begin(), r.size(), m_f);
-  } // end operator()()
-}; // end body
-
-
-template<typename Size, typename RandomAccessIterator, typename UnaryFunction>
-  body<RandomAccessIterator,Size,UnaryFunction>
-    make_body(RandomAccessIterator first, UnaryFunction f)
-{
-  return body<RandomAccessIterator,Size,UnaryFunction>(first, f);
-} // end make_body()
-
-
-} // end for_each_detail
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename Size,
-         typename UnaryFunction>
-RandomAccessIterator for_each_n(execution_policy<DerivedPolicy> &,
-                                RandomAccessIterator first,
-                                Size n,
-                                UnaryFunction f)
-{
-  ::tbb::parallel_for(::tbb::blocked_range<Size>(0,n), for_each_detail::make_body<Size>(first,f));
-
-  // return the end of the range
-  return first + n;
-} // end for_each_n 
-
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename UnaryFunction>
-  RandomAccessIterator for_each(execution_policy<DerivedPolicy> &s,
-                                RandomAccessIterator first,
-                                RandomAccessIterator last,
-                                UnaryFunction f)
-{
-  return tbb::detail::for_each_n(s, first, thrust::distance(first,last), f);
-} // end for_each()
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/gather.h b/compat/thrust/system/tbb/detail/gather.h
deleted file mode 100644
index dfb7d7fc2d..0000000000
--- a/compat/thrust/system/tbb/detail/gather.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits gather
-#include <thrust/system/cpp/detail/gather.h>
-
diff --git a/compat/thrust/system/tbb/detail/generate.h b/compat/thrust/system/tbb/detail/generate.h
deleted file mode 100644
index 0cb33b9336..0000000000
--- a/compat/thrust/system/tbb/detail/generate.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits generate
-#include <thrust/system/cpp/detail/generate.h>
-
diff --git a/compat/thrust/system/tbb/detail/get_value.h b/compat/thrust/system/tbb/detail/get_value.h
deleted file mode 100644
index e376e65749..0000000000
--- a/compat/thrust/system/tbb/detail/get_value.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits get_value
-#include <thrust/system/cpp/detail/get_value.h>
-
diff --git a/compat/thrust/system/tbb/detail/inner_product.h b/compat/thrust/system/tbb/detail/inner_product.h
deleted file mode 100644
index 351421a577..0000000000
--- a/compat/thrust/system/tbb/detail/inner_product.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits inner_product
-#include <thrust/system/cpp/detail/inner_product.h>
-
diff --git a/compat/thrust/system/tbb/detail/iter_swap.h b/compat/thrust/system/tbb/detail/iter_swap.h
deleted file mode 100644
index 16176ec69b..0000000000
--- a/compat/thrust/system/tbb/detail/iter_swap.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits iter_swap
-#include <thrust/system/cpp/detail/iter_swap.h>
-
diff --git a/compat/thrust/system/tbb/detail/logical.h b/compat/thrust/system/tbb/detail/logical.h
deleted file mode 100644
index b2a80de70f..0000000000
--- a/compat/thrust/system/tbb/detail/logical.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits logical
-#include <thrust/system/cpp/detail/logical.h>
-
diff --git a/compat/thrust/system/tbb/detail/malloc_and_free.h b/compat/thrust/system/tbb/detail/malloc_and_free.h
deleted file mode 100644
index 811a552a4f..0000000000
--- a/compat/thrust/system/tbb/detail/malloc_and_free.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits malloc and free
-#include <thrust/system/cpp/detail/malloc_and_free.h>
-
diff --git a/compat/thrust/system/tbb/detail/memory.inl b/compat/thrust/system/tbb/detail/memory.inl
deleted file mode 100644
index 420a8a14b4..0000000000
--- a/compat/thrust/system/tbb/detail/memory.inl
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/system/cpp/detail/execution_policy.h>
-#include <thrust/system/tbb/memory.h>
-#include <thrust/system/cpp/memory.h>
-#include <limits>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-
-
-template<typename T>
-  template<typename OtherT>
-    reference<T> &
-      reference<T>
-        ::operator=(const reference<OtherT> &other)
-{
-  return super_t::operator=(other);
-} // end reference::operator=()
-
-template<typename T>
-  reference<T> &
-    reference<T>
-      ::operator=(const value_type &x)
-{
-  return super_t::operator=(x);
-} // end reference::operator=()
-
-template<typename T>
-__host__ __device__
-void swap(reference<T> a, reference<T> b)
-{
-  a.swap(b);
-} // end swap()
-
-namespace detail
-{
-
-// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc
-//     is not defined
-//     WAR the problem by using adl to call cpp::malloc, which requires it to depend
-//     on a template parameter
-template<typename Tag>
-  pointer<void> malloc_workaround(Tag t, std::size_t n)
-{
-  return pointer<void>(malloc(t, n));
-} // end malloc_workaround()
-
-// XXX circular #inclusion problems cause the compiler to believe that cpp::free
-//     is not defined
-//     WAR the problem by using adl to call cpp::free, which requires it to depend
-//     on a template parameter
-template<typename Tag>
-  void free_workaround(Tag t, pointer<void> ptr)
-{
-  free(t, ptr.get());
-} // end free_workaround()
-
-} // end detail
-
-inline pointer<void> malloc(std::size_t n)
-{
-  // XXX this is how we'd like to implement this function,
-  //     if not for circular #inclusion problems:
-  //
-  // return pointer<void>(thrust::system::cpp::malloc(n))
-  //
-  return detail::malloc_workaround(cpp::tag(), n);
-} // end malloc()
-
-template<typename T>
-pointer<T> malloc(std::size_t n)
-{
-  pointer<void> raw_ptr = thrust::system::tbb::malloc(sizeof(T) * n);
-  return pointer<T>(reinterpret_cast<T*>(raw_ptr.get()));
-} // end malloc()
-
-inline void free(pointer<void> ptr)
-{
-  // XXX this is how we'd like to implement this function,
-  //     if not for circular #inclusion problems:
-  //
-  // thrust::system::cpp::free(ptr)
-  //
-  detail::free_workaround(cpp::tag(), ptr);
-} // end free()
-
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/merge.h b/compat/thrust/system/tbb/detail/merge.h
deleted file mode 100644
index 7b203ec327..0000000000
--- a/compat/thrust/system/tbb/detail/merge.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-OutputIterator merge(execution_policy<ExecutionPolicy> &exec,
-                     InputIterator1 first1,
-                     InputIterator1 last1,
-                     InputIterator2 first2,
-                     InputIterator2 last2,
-                     OutputIterator result,
-                     StrictWeakOrdering comp);
-
-template <typename ExecutionPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename InputIterator4,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename StrictWeakOrdering>
-thrust::pair<OutputIterator1,OutputIterator2>
-  merge_by_key(execution_policy<ExecutionPolicy> &exec,
-               InputIterator1 keys_first1,
-               InputIterator1 keys_last1,
-               InputIterator2 keys_first2,
-               InputIterator2 keys_last2,
-               InputIterator3 values_first3,
-               InputIterator4 values_first4,
-               OutputIterator1 keys_result,
-               OutputIterator2 values_result,
-               StrictWeakOrdering comp);
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
-#include <thrust/system/tbb/detail/merge.inl>
-
diff --git a/compat/thrust/system/tbb/detail/merge.inl b/compat/thrust/system/tbb/detail/merge.inl
deleted file mode 100644
index cc902af85b..0000000000
--- a/compat/thrust/system/tbb/detail/merge.inl
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/system/detail/internal/scalar/merge.h>
-#include <thrust/system/detail/internal/scalar/binary_search.h>
-#include <tbb/parallel_for.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace merge_detail
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-struct range
-{
-  InputIterator1 first1, last1;
-  InputIterator2 first2, last2;
-  OutputIterator result;
-  StrictWeakOrdering comp;
-  size_t grain_size;
-
-  range(InputIterator1 first1, InputIterator1 last1,
-        InputIterator2 first2, InputIterator2 last2,
-        OutputIterator result,
-        StrictWeakOrdering comp,
-        size_t grain_size = 1024)
-    : first1(first1), last1(last1),
-      first2(first2), last2(last2),
-      result(result), comp(comp), grain_size(grain_size)
-  {}
-  
-  range(range& r, ::tbb::split)
-    : first1(r.first1), last1(r.last1),
-      first2(r.first2), last2(r.last2),
-      result(r.result), comp(r.comp), grain_size(r.grain_size)
-  {
-    // we can assume n1 and n2 are not both 0
-    size_t n1 = thrust::distance(first1, last1);
-    size_t n2 = thrust::distance(first2, last2);
-
-    InputIterator1 mid1 = first1;
-    InputIterator2 mid2 = first2;
-
-    if (n1 > n2)
-    {
-      mid1 += n1 / 2;
-      mid2 = thrust::system::detail::internal::scalar::lower_bound(first2, last2, raw_reference_cast(*mid1), comp);
-    }
-    else
-    {
-      mid2 += n2 / 2;
-      mid1 = thrust::system::detail::internal::scalar::upper_bound(first1, last1, raw_reference_cast(*mid2), comp);
-    }
-    
-    // set first range to [first1, mid1), [first2, mid2), result
-    r.last1 = mid1;
-    r.last2 = mid2;
-
-    // set second range to [mid1, last1), [mid2, last2), result + (mid1 - first1) + (mid2 - first2)
-    first1 = mid1;
-    first2 = mid2;
-    result += thrust::distance(r.first1, mid1) + thrust::distance(r.first2, mid2);
-  }
-
-  bool empty(void) const
-  {
-    return (first1 == last1) && (first2 == last2);
-  }
-
-  bool is_divisible(void) const
-  {
-    return static_cast<size_t>(thrust::distance(first1, last1) + thrust::distance(first2, last2)) > grain_size;
-  }
-};
-
-struct body
-{
-  template <typename Range>
-  void operator()(Range& r) const
-  {
-    thrust::system::detail::internal::scalar::merge
-      (r.first1, r.last1,
-       r.first2, r.last2,
-       r.result,
-       r.comp);
-  }
-};
-
-} // end namespace merge_detail
-
-namespace merge_by_key_detail
-{
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename InputIterator4,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename StrictWeakOrdering>
-struct range
-{
-  InputIterator1 keys_first1, keys_last1;
-  InputIterator2 keys_first2, keys_last2;
-  InputIterator3 values_first1;
-  InputIterator4 values_first2;
-  OutputIterator1 keys_result;
-  OutputIterator2 values_result;
-  StrictWeakOrdering comp;
-  size_t grain_size;
-
-  range(InputIterator1 keys_first1, InputIterator1 keys_last1,
-        InputIterator2 keys_first2, InputIterator2 keys_last2,
-        InputIterator3 values_first1,
-        InputIterator4 values_first2,
-        OutputIterator1 keys_result,
-        OutputIterator2 values_result,
-        StrictWeakOrdering comp,
-        size_t grain_size = 1024)
-    : keys_first1(keys_first1), keys_last1(keys_last1),
-      keys_first2(keys_first2), keys_last2(keys_last2),
-      values_first1(values_first1),
-      values_first2(values_first2),
-      keys_result(keys_result), values_result(values_result),
-      comp(comp), grain_size(grain_size)
-  {}
-  
-  range(range& r, ::tbb::split)
-    : keys_first1(r.keys_first1), keys_last1(r.keys_last1),
-      keys_first2(r.keys_first2), keys_last2(r.keys_last2),
-      values_first1(r.values_first1),
-      values_first2(r.values_first2),
-      keys_result(r.keys_result), values_result(r.values_result),
-      comp(r.comp), grain_size(r.grain_size)
-  {
-    // we can assume n1 and n2 are not both 0
-    size_t n1 = thrust::distance(keys_first1, keys_last1);
-    size_t n2 = thrust::distance(keys_first2, keys_last2);
-
-    InputIterator1 mid1 = keys_first1;
-    InputIterator2 mid2 = keys_first2;
-
-    if (n1 > n2)
-    {
-      mid1 += n1 / 2;
-      mid2 = thrust::system::detail::internal::scalar::lower_bound(keys_first2, keys_last2, raw_reference_cast(*mid1), comp);
-    }
-    else
-    {
-      mid2 += n2 / 2;
-      mid1 = thrust::system::detail::internal::scalar::upper_bound(keys_first1, keys_last1, raw_reference_cast(*mid2), comp);
-    }
-    
-    // set first range to [keys_first1, mid1), [keys_first2, mid2), keys_result, values_result
-    r.keys_last1 = mid1;
-    r.keys_last2 = mid2;
-
-    // set second range to [mid1, keys_last1), [mid2, keys_last2), keys_result + (mid1 - keys_first1) + (mid2 - keys_first2), values_result + (mid1 - keys_first1) + (mid2 - keys_first2) 
-    keys_first1 = mid1;
-    keys_first2 = mid2;
-    values_first1 += thrust::distance(r.keys_first1, mid1);
-    values_first2 += thrust::distance(r.keys_first2, mid2);
-    keys_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2);
-    values_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2);
-  }
-
-  bool empty(void) const
-  {
-    return (keys_first1 == keys_last1) && (keys_first2 == keys_last2);
-  }
-
-  bool is_divisible(void) const
-  {
-    return static_cast<size_t>(thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)) > grain_size;
-  }
-};
-
-struct body
-{
-  template <typename Range>
-  void operator()(Range& r) const
-  {
-    thrust::system::detail::internal::scalar::merge_by_key
-      (r.keys_first1, r.keys_last1,
-       r.keys_first2, r.keys_last2,
-       r.values_first1,
-       r.values_first2,
-       r.keys_result,
-       r.values_result,
-       r.comp);
-  }
-};
-
-} // end namespace merge_by_key_detail
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename StrictWeakOrdering>
-OutputIterator merge(execution_policy<DerivedPolicy> &exec,
-                     InputIterator1 first1,
-                     InputIterator1 last1,
-                     InputIterator2 first2,
-                     InputIterator2 last2,
-                     OutputIterator result,
-                     StrictWeakOrdering comp)
-{
-  typedef typename merge_detail::range<InputIterator1,InputIterator2,OutputIterator,StrictWeakOrdering> Range;
-  typedef          merge_detail::body                                                                   Body;
-  Range range(first1, last1, first2, last2, result, comp);
-  Body  body;
-
-  ::tbb::parallel_for(range, body);
-
-  thrust::advance(result, thrust::distance(first1, last1) + thrust::distance(first2, last2));
-
-  return result;
-} // end merge()
-
-template <typename DerivedPolicy,
-          typename InputIterator1,
-          typename InputIterator2,
-          typename InputIterator3,
-          typename InputIterator4,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename StrictWeakOrdering>
-thrust::pair<OutputIterator1,OutputIterator2>
-  merge_by_key(execution_policy<DerivedPolicy> &exec,
-               InputIterator1 keys_first1,
-               InputIterator1 keys_last1,
-               InputIterator2 keys_first2,
-               InputIterator2 keys_last2,
-               InputIterator3 values_first3,
-               InputIterator4 values_first4,
-               OutputIterator1 keys_result,
-               OutputIterator2 values_result,
-               StrictWeakOrdering comp)
-{
-  typedef typename merge_by_key_detail::range<InputIterator1,InputIterator2,InputIterator3,InputIterator4,OutputIterator1,OutputIterator2,StrictWeakOrdering> Range;
-  typedef          merge_by_key_detail::body                                                                                                                  Body;
-
-  Range range(keys_first1, keys_last1, keys_first2, keys_last2, values_first3, values_first4, keys_result, values_result, comp);
-  Body  body;
-
-  ::tbb::parallel_for(range, body);
-
-  thrust::advance(keys_result,   thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2));
-  thrust::advance(values_result, thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2));
-
-  return thrust::make_pair(keys_result,values_result);
-}
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/mismatch.h b/compat/thrust/system/tbb/detail/mismatch.h
deleted file mode 100644
index 03980cfcd5..0000000000
--- a/compat/thrust/system/tbb/detail/mismatch.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits mismatch
-#include <thrust/system/cpp/detail/mismatch.h>
-
diff --git a/compat/thrust/system/tbb/detail/par.h b/compat/thrust/system/tbb/detail/par.h
deleted file mode 100644
index 74801ab914..0000000000
--- a/compat/thrust/system/tbb/detail/par.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/detail/execute_with_allocator.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-struct par_t : thrust::system::tbb::detail::execution_policy<par_t>
-{
-  par_t() : thrust::system::tbb::detail::execution_policy<par_t>() {}
-
-  template<typename Allocator>
-    thrust::detail::execute_with_allocator<Allocator, thrust::system::tbb::detail::execution_policy>
-      operator()(Allocator &alloc) const
-  {
-    return thrust::detail::execute_with_allocator<Allocator, thrust::system::tbb::detail::execution_policy>(alloc);
-  }
-};
-
-
-} // end detail
-
-
-static const detail::par_t par;
-
-
-} // end tbb
-} // end system
-
-
-// alias par here
-namespace tbb
-{
-
-
-using thrust::system::tbb::par;
-
-
-} // end tbb
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/partition.h b/compat/thrust/system/tbb/detail/partition.h
deleted file mode 100644
index af37121888..0000000000
--- a/compat/thrust/system/tbb/detail/partition.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred);
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred);
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred);
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/partition.inl>
-
diff --git a/compat/thrust/system/tbb/detail/partition.inl b/compat/thrust/system/tbb/detail/partition.inl
deleted file mode 100644
index 1e421e10f3..0000000000
--- a/compat/thrust/system/tbb/detail/partition.inl
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/partition.h>
-#include <thrust/system/detail/generic/partition.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   Predicate pred)
-{
-  // tbb prefers generic::stable_partition to cpp::stable_partition
-  return thrust::system::detail::generic::stable_partition(exec, first, last, pred);
-} // end stable_partition()
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator stable_partition(execution_policy<DerivedPolicy> &exec,
-                                   ForwardIterator first,
-                                   ForwardIterator last,
-                                   InputIterator stencil,
-                                   Predicate pred)
-{
-  // tbb prefers generic::stable_partition to cpp::stable_partition
-  return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred);
-} // end stable_partition()
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator first,
-                          InputIterator last,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy
-  return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename Predicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    stable_partition_copy(execution_policy<DerivedPolicy> &exec,
-                          InputIterator1 first,
-                          InputIterator1 last,
-                          InputIterator2 stencil,
-                          OutputIterator1 out_true,
-                          OutputIterator2 out_false,
-                          Predicate pred)
-{
-  // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy
-  return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred);
-} // end stable_partition_copy()
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/reduce.h b/compat/thrust/system/tbb/detail/reduce.h
deleted file mode 100644
index 83a7cc3214..0000000000
--- a/compat/thrust/system/tbb/detail/reduce.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file reduce.h
- *  \brief TBB implementation of reduce.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator begin,
-                    InputIterator end,
-                    OutputType init,
-                    BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/reduce.inl>
-
diff --git a/compat/thrust/system/tbb/detail/reduce.inl b/compat/thrust/system/tbb/detail/reduce.inl
deleted file mode 100644
index c249852769..0000000000
--- a/compat/thrust/system/tbb/detail/reduce.inl
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/function.h>
-#include <thrust/detail/static_assert.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/reduce.h>
-#include <tbb/blocked_range.h>
-#include <tbb/parallel_reduce.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace reduce_detail
-{
-
-template<typename RandomAccessIterator,
-         typename OutputType,
-         typename BinaryFunction>
-struct body
-{
-  RandomAccessIterator first;
-  OutputType sum;
-  bool first_call;  // TBB can invoke operator() multiple times on the same body
-  thrust::detail::host_function<BinaryFunction,OutputType> binary_op;
-
-  // note: we only initalize sum with init to avoid calling OutputType's default constructor
-  body(RandomAccessIterator first, OutputType init, BinaryFunction binary_op)
-    : first(first), sum(init), first_call(true), binary_op(binary_op)
-  {}
-
-  // note: we only initalize sum with b.sum to avoid calling OutputType's default constructor
-  body(body& b, ::tbb::split)
-    : first(b.first), sum(b.sum), first_call(true), binary_op(b.binary_op)
-  {}
-
-  template <typename Size>
-  void operator()(const ::tbb::blocked_range<Size> &r)
-  {
-    // we assume that blocked_range specifies a contiguous range of integers
-    
-    if (r.empty()) return; // nothing to do
-
-    RandomAccessIterator iter = first + r.begin();
-
-    OutputType temp = thrust::raw_reference_cast(*iter);
-
-    ++iter;
-
-    for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter)
-      temp = binary_op(temp, *iter);
-
-
-    if (first_call)
-    {
-      // first time body has been invoked
-      first_call = false;
-      sum = temp;
-    }
-    else
-    {
-      // body has been previously invoked, accumulate temp into sum
-      sum = binary_op(sum, temp);
-    }
-  } // end operator()()
-  
-  void join(body& b)
-  {
-    sum = binary_op(sum, b.sum);
-  }
-}; // end body
-
-} // end reduce_detail
-
-
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType reduce(execution_policy<DerivedPolicy> &exec,
-                    InputIterator begin,
-                    InputIterator end,
-                    OutputType init,
-                    BinaryFunction binary_op)
-{
-  typedef typename thrust::iterator_difference<InputIterator>::type Size; 
-
-  Size n = thrust::distance(begin, end);
-
-  if (n == 0)
-  {
-    return init;
-  }
-  else
-  {
-    typedef typename reduce_detail::body<InputIterator,OutputType,BinaryFunction> Body;
-    Body reduce_body(begin, init, binary_op);
-    ::tbb::parallel_reduce(::tbb::blocked_range<Size>(0,n), reduce_body);
-    return binary_op(init, reduce_body.sum);
-  }
-}
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.h b/compat/thrust/system/tbb/detail/reduce_by_key.h
deleted file mode 100644
index 0149a763f7..0000000000
--- a/compat/thrust/system/tbb/detail/reduce_by_key.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    reduce_by_key(execution_policy<DerivedPolicy> &exec,
-                  InputIterator1 keys_first, 
-                  InputIterator1 keys_last,
-                  InputIterator2 values_first,
-                  OutputIterator1 keys_output,
-                  OutputIterator2 values_output,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/reduce_by_key.inl>
-
diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.inl b/compat/thrust/system/tbb/detail/reduce_by_key.inl
deleted file mode 100644
index 10d2d8b4a8..0000000000
--- a/compat/thrust/system/tbb/detail/reduce_by_key.inl
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/reduce_by_key.h>
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/system/cpp/execution_policy.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/system/tbb/detail/reduce_intervals.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/range/tail_flags.h>
-#include <tbb/blocked_range.h>
-#include <tbb/parallel_for.h>
-#include <tbb/tbb_thread.h>
-#include <cassert>
-
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace reduce_by_key_detail
-{
-
-
-template<typename L, typename R>
-  inline L divide_ri(const L x, const R y)
-{
-  return (x + (y - 1)) / y;
-}
-
-
-template<typename InputIterator, typename BinaryFunction, typename OutputIterator = void>
-  struct partial_sum_type
-    : thrust::detail::eval_if<
-        thrust::detail::has_result_type<BinaryFunction>::value,
-        thrust::detail::result_type<BinaryFunction>,
-        thrust::detail::eval_if<
-          thrust::detail::is_output_iterator<OutputIterator>::value,
-          thrust::iterator_value<InputIterator>,
-          thrust::iterator_value<OutputIterator>
-        >
-      >
-{};
-
-
-template<typename InputIterator, typename BinaryFunction>
-  struct partial_sum_type<InputIterator,BinaryFunction,void>
-    : thrust::detail::eval_if<
-        thrust::detail::has_result_type<BinaryFunction>::value,
-        thrust::detail::result_type<BinaryFunction>,
-        thrust::iterator_value<InputIterator>
-      >
-{};
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::pair<
-    InputIterator1,
-    thrust::pair<
-      typename InputIterator1::value_type,
-      typename partial_sum_type<InputIterator2,BinaryFunction>::type
-    >
-  >
-    reduce_last_segment_backward(InputIterator1 keys_first,
-                                 InputIterator1 keys_last,
-                                 InputIterator2 values_first,
-                                 BinaryPredicate binary_pred,
-                                 BinaryFunction binary_op)
-{
-  typename thrust::iterator_difference<InputIterator1>::type n = keys_last - keys_first;
-
-  // reverse the ranges and consume from the end
-  thrust::reverse_iterator<InputIterator1> keys_first_r(keys_last);
-  thrust::reverse_iterator<InputIterator1> keys_last_r(keys_first);
-  thrust::reverse_iterator<InputIterator2> values_first_r(values_first + n);
-
-  typename InputIterator1::value_type result_key = *keys_first_r;
-  typename partial_sum_type<InputIterator2,BinaryFunction>::type result_value = *values_first_r;
-
-  // consume the entirety of the first key's sequence
-  for(++keys_first_r, ++values_first_r;
-      (keys_first_r != keys_last_r) && binary_pred(*keys_first_r, result_key);
-      ++keys_first_r, ++values_first_r)
-  {
-    result_value = binary_op(result_value, *values_first_r);
-  }
-
-  return thrust::make_pair(keys_first_r.base(), thrust::make_pair(result_key, result_value));
-}
-
-
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate,
-         typename BinaryFunction>
-  thrust::tuple<
-    OutputIterator1,
-    OutputIterator2,
-    typename InputIterator1::value_type,
-    typename partial_sum_type<InputIterator2,BinaryFunction>::type
-  >
-    reduce_by_key_with_carry(InputIterator1 keys_first, 
-                             InputIterator1 keys_last,
-                             InputIterator2 values_first,
-                             OutputIterator1 keys_output,
-                             OutputIterator2 values_output,
-                             BinaryPredicate binary_pred,
-                             BinaryFunction binary_op)
-{
-  // first, consume the last sequence to produce the carry
-  // XXX is there an elegant way to pose this such that we don't need to default construct carry?
-  thrust::pair<
-    typename InputIterator1::value_type,
-    typename partial_sum_type<InputIterator2,BinaryFunction>::type
-  > carry;
-
-  thrust::tie(keys_last, carry) = reduce_last_segment_backward(keys_first, keys_last, values_first, binary_pred, binary_op);
-
-  // finish with sequential reduce_by_key
-  thrust::cpp::tag seq;
-  thrust::tie(keys_output, values_output) =
-    thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
-  
-  return thrust::make_tuple(keys_output, values_output, carry.first, carry.second);
-}
-
-
-template<typename Iterator>
-  bool interval_has_carry(size_t interval_idx, size_t interval_size, size_t num_intervals, Iterator tail_flags)
-{
-  // to discover whether the interval has a carry, look at the tail_flag corresponding to its last element 
-  // the final interval never has a carry by definition
-  return (interval_idx + 1 < num_intervals) ? !tail_flags[(interval_idx + 1) * interval_size - 1] : false;
-}
-
-
-template<typename Iterator1, typename Iterator2, typename Iterator3, typename Iterator4, typename Iterator5, typename Iterator6, typename BinaryPredicate, typename BinaryFunction>
-  struct serial_reduce_by_key_body
-{
-  typedef typename thrust::iterator_difference<Iterator1>::type size_type;
-
-  Iterator1 keys_first;
-  Iterator2 values_first;
-  Iterator3 result_offset;
-  Iterator4 keys_result;
-  Iterator5 values_result;
-  Iterator6 carry_result;
-
-  size_type n;
-  size_type interval_size;
-  size_type num_intervals;
-
-  BinaryPredicate binary_pred;
-  BinaryFunction binary_op;
-
-  serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, size_type n, size_type interval_size, size_type num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op)
-    : keys_first(keys_first), values_first(values_first),
-      result_offset(result_offset),
-      keys_result(keys_result),
-      values_result(values_result),
-      carry_result(carry_result),
-      n(n),
-      interval_size(interval_size),
-      num_intervals(num_intervals),
-      binary_pred(binary_pred),
-      binary_op(binary_op)
-  {}
-
-  void operator()(const ::tbb::blocked_range<size_type> &r) const
-  {
-    assert(r.size() == 1);
-
-    const size_type interval_idx = r.begin();
-
-    const size_type offset_to_first = interval_size * interval_idx;
-    const size_type offset_to_last = thrust::min(n, offset_to_first + interval_size);
-
-    Iterator1 my_keys_first     = keys_first    + offset_to_first;
-    Iterator1 my_keys_last      = keys_first    + offset_to_last;
-    Iterator2 my_values_first   = values_first  + offset_to_first;
-    Iterator3 my_result_offset  = result_offset + interval_idx;
-    Iterator4 my_keys_result    = keys_result   + *my_result_offset;
-    Iterator5 my_values_result  = values_result + *my_result_offset;
-    Iterator6 my_carry_result   = carry_result  + interval_idx;
-
-    // consume the rest of the interval with reduce_by_key
-    typedef typename thrust::iterator_value<Iterator1>::type key_type;
-    typedef typename partial_sum_type<Iterator2,BinaryFunction>::type value_type;
-
-    // XXX is there a way to pose this so that we don't require default construction of carry?
-    thrust::pair<key_type, value_type> carry;
-
-    thrust::tie(my_keys_result, my_values_result, carry.first, carry.second) =
-      reduce_by_key_with_carry(my_keys_first,
-                               my_keys_last,
-                               my_values_first,
-                               my_keys_result,
-                               my_values_result,
-                               binary_pred,
-                               binary_op);
-
-    // store to carry only when we actually have a carry
-    // store to my_keys_result & my_values_result otherwise
-    
-    // create tail_flags so we can check for a carry
-    thrust::detail::tail_flags<Iterator1,BinaryPredicate> flags = thrust::detail::make_tail_flags(keys_first, keys_first + n, binary_pred);
-
-    if(interval_has_carry(interval_idx, interval_size, num_intervals, flags.begin()))
-    {
-      // we can ignore the carry's key
-      // XXX because the carry result is uninitialized, we should copy construct
-      *my_carry_result = carry.second;
-    }
-    else
-    {
-      *my_keys_result = carry.first;
-      *my_values_result = carry.second;
-    }
-  }
-};
-
-
-template<typename Iterator1, typename Iterator2, typename Iterator3, typename Iterator4, typename Iterator5, typename Iterator6, typename BinaryPredicate, typename BinaryFunction>
-  serial_reduce_by_key_body<Iterator1,Iterator2,Iterator3,Iterator4,Iterator5,Iterator6,BinaryPredicate,BinaryFunction>
-    make_serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, typename thrust::iterator_difference<Iterator1>::type n, size_t interval_size, size_t num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op)
-{
-  return serial_reduce_by_key_body<Iterator1,Iterator2,Iterator3,Iterator4,Iterator5,Iterator6,BinaryPredicate,BinaryFunction>(keys_first, values_first, result_offset, keys_result, values_result, carry_result, n, interval_size, num_intervals, binary_pred, binary_op);
-}
-
-
-} // end reduce_by_key_detail
-
-
-template<typename DerivedPolicy, typename Iterator1, typename Iterator2, typename Iterator3, typename Iterator4, typename BinaryPredicate, typename BinaryFunction>
-  thrust::pair<Iterator3,Iterator4>
-    reduce_by_key(thrust::tbb::execution_policy<DerivedPolicy> &exec,
-                  Iterator1 keys_first, Iterator1 keys_last, 
-                  Iterator2 values_first,
-                  Iterator3 keys_result,
-                  Iterator4 values_result,
-                  BinaryPredicate binary_pred,
-                  BinaryFunction binary_op)
-{
-
-  typedef typename thrust::iterator_difference<Iterator1>::type difference_type;
-  difference_type n = keys_last - keys_first;
-  if(n == 0) return thrust::make_pair(keys_result, values_result);
-
-  // XXX this value is a tuning opportunity
-  const difference_type parallelism_threshold = 10000;
-
-  if(n < parallelism_threshold)
-  {
-    // don't bother parallelizing for small n
-    thrust::cpp::tag seq;
-    return thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_result, values_result, binary_pred, binary_op);
-  }
-
-  // count the number of processors
-  const unsigned int p = thrust::max<unsigned int>(1u, ::tbb::tbb_thread::hardware_concurrency());
-
-  // generate O(P) intervals of sequential work
-  // XXX oversubscribing is a tuning opportunity
-  const unsigned int subscription_rate = 1;
-  difference_type interval_size = thrust::min<difference_type>(parallelism_threshold, thrust::max<difference_type>(n, n / (subscription_rate * p)));
-  difference_type num_intervals = reduce_by_key_detail::divide_ri(n, interval_size);
-
-  // decompose the input into intervals of size N / num_intervals
-  // add one extra element to this vector to store the size of the entire result
-  thrust::detail::temporary_array<difference_type, DerivedPolicy> interval_output_offsets(0, exec, num_intervals + 1);
-
-  // first count the number of tail flags in each interval
-  thrust::detail::tail_flags<Iterator1,BinaryPredicate> tail_flags = thrust::detail::make_tail_flags(keys_first, keys_last, binary_pred);
-  thrust::system::tbb::detail::reduce_intervals(exec, tail_flags.begin(), tail_flags.end(), interval_size, interval_output_offsets.begin() + 1, thrust::plus<size_t>());
-  interval_output_offsets[0] = 0;
-
-  // scan the counts to get each body's output offset
-  thrust::cpp::tag seq;
-  thrust::inclusive_scan(seq,
-                         interval_output_offsets.begin() + 1, interval_output_offsets.end(), 
-                         interval_output_offsets.begin() + 1);
-
-  // do a reduce_by_key serially in each thread
-  // the final interval never has a carry by definition, so don't reserve space for it
-  typedef typename reduce_by_key_detail::partial_sum_type<Iterator2,BinaryFunction>::type carry_type;
-  thrust::detail::temporary_array<carry_type, DerivedPolicy> carries(0, exec, num_intervals - 1);
-
-  // force grainsize == 1 with simple_partioner()
-  ::tbb::parallel_for(::tbb::blocked_range<difference_type>(0, num_intervals, 1),
-    reduce_by_key_detail::make_serial_reduce_by_key_body(keys_first, values_first, interval_output_offsets.begin(), keys_result, values_result, carries.begin(), n, interval_size, num_intervals, binary_pred, binary_op),
-    ::tbb::simple_partitioner());
-
-  difference_type size_of_result = interval_output_offsets[num_intervals];
-
-  // sequentially accumulate the carries
-  // note that the last interval does not have a carry
-  // XXX find a way to express this loop via a sequential algorithm, perhaps reduce_by_key
-  for(typename thrust::detail::temporary_array<carry_type,DerivedPolicy>::size_type i = 0; i < carries.size(); ++i)
-  {
-    // if our interval has a carry, then we need to sum the carry to the next interval's output offset
-    // if it does not have a carry, then we need to ignore carry_value[i]
-    if(reduce_by_key_detail::interval_has_carry(i, interval_size, num_intervals, tail_flags.begin()))
-    {
-      difference_type output_idx = interval_output_offsets[i+1];
-
-      values_result[output_idx] = binary_op(values_result[output_idx], carries[i]);
-    }
-  }
-
-  return thrust::make_pair(keys_result + size_of_result, values_result + size_of_result);
-}
-
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/reduce_intervals.h b/compat/thrust/system/tbb/detail/reduce_intervals.h
deleted file mode 100644
index 0647ffd464..0000000000
--- a/compat/thrust/system/tbb/detail/reduce_intervals.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-#include <tbb/parallel_for.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/minmax.h>
-#include <thrust/system/cpp/memory.h>
-#include <thrust/reduce.h>
-#include <cassert>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace reduce_intervals_detail
-{
-
-
-template<typename L, typename R>
-  inline L divide_ri(const L x, const R y)
-{
-  return (x + (y - 1)) / y;
-}
-
-
-template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Size, typename BinaryFunction>
-  struct body
-{
-  RandomAccessIterator1 first;
-  RandomAccessIterator2 result;
-  Size n, interval_size;
-  BinaryFunction binary_op;
-
-  body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op)
-    : first(first), result(result), n(n), interval_size(interval_size), binary_op(binary_op)
-  {}
-
-  void operator()(const ::tbb::blocked_range<Size> &r) const
-  {
-    assert(r.size() == 1);
-
-    Size interval_idx = r.begin();
-
-    Size offset_to_first = interval_size * interval_idx;
-    Size offset_to_last = thrust::min(n, offset_to_first + interval_size);
-
-    RandomAccessIterator1 my_first = first + offset_to_first;
-    RandomAccessIterator1 my_last  = first + offset_to_last;
-
-    thrust::cpp::tag seq;
-
-    // carefully pass the init value for the interval with raw_reference_cast
-    typedef typename BinaryFunction::result_type sum_type;
-    result[interval_idx] =
-      thrust::reduce(seq, my_first + 1, my_last, sum_type(thrust::raw_reference_cast(*my_first)), binary_op);
-  }
-};
-
-
-template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Size, typename BinaryFunction>
-  body<RandomAccessIterator1,RandomAccessIterator2,Size,BinaryFunction>
-    make_body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op)
-{
-  return body<RandomAccessIterator1,RandomAccessIterator2,Size,BinaryFunction>(first, result, n, interval_size, binary_op);
-}
-
-
-} // end reduce_intervals_detail
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator1, typename Size, typename RandomAccessIterator2, typename BinaryFunction>
-  void reduce_intervals(thrust::tbb::execution_policy<DerivedPolicy> &,
-                        RandomAccessIterator1 first,
-                        RandomAccessIterator1 last,
-                        Size interval_size,
-                        RandomAccessIterator2 result,
-                        BinaryFunction binary_op)
-{
-  typename thrust::iterator_difference<RandomAccessIterator1>::type n = last - first;
-
-  Size num_intervals = reduce_intervals_detail::divide_ri(n, interval_size);
-
-  ::tbb::parallel_for(::tbb::blocked_range<Size>(0, num_intervals, 1), reduce_intervals_detail::make_body(first, result, Size(n), interval_size, binary_op), ::tbb::simple_partitioner());
-}
-
-
-template<typename DerivedPolicy, typename RandomAccessIterator1, typename Size, typename RandomAccessIterator2>
-  void reduce_intervals(thrust::tbb::execution_policy<DerivedPolicy> &exec,
-                        RandomAccessIterator1 first,
-                        RandomAccessIterator1 last,
-                        Size interval_size,
-                        RandomAccessIterator2 result)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type;
-
-  return thrust::system::tbb::detail::reduce_intervals(exec, first, last, interval_size, result, thrust::plus<value_type>());
-}
-
-
-} // end detail
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/detail/remove.h b/compat/thrust/system/tbb/detail/remove.h
deleted file mode 100644
index 48cbb5c322..0000000000
--- a/compat/thrust/system/tbb/detail/remove.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/omp/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace omp
-{
-namespace detail
-{
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<ExecutionPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred);
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<ExecutionPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<ExecutionPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<ExecutionPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred);
-
-
-} // end namespace detail
-} // end namespace omp
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/remove.inl>
-
diff --git a/compat/thrust/system/tbb/detail/remove.inl b/compat/thrust/system/tbb/detail/remove.inl
deleted file mode 100644
index 01916c52c8..0000000000
--- a/compat/thrust/system/tbb/detail/remove.inl
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/remove.h>
-#include <thrust/system/detail/generic/remove.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            Predicate pred)
-{
-  // tbb prefers generic::remove_if to cpp::remove_if
-  return thrust::system::detail::generic::remove_if(exec, first, last, pred);
-}
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename InputIterator,
-         typename Predicate>
-  ForwardIterator remove_if(execution_policy<DerivedPolicy> &exec,
-                            ForwardIterator first,
-                            ForwardIterator last,
-                            InputIterator stencil,
-                            Predicate pred)
-{
-  // tbb prefers generic::remove_if to cpp::remove_if
-  return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred);
-}
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // tbb prefers generic::remove_copy_if to cpp::remove_copy_if
-  return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred);
-}
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename Predicate>
-  OutputIterator remove_copy_if(execution_policy<DerivedPolicy> &exec,
-                                InputIterator1 first,
-                                InputIterator1 last,
-                                InputIterator2 stencil,
-                                OutputIterator result,
-                                Predicate pred)
-{
-  // tbb prefers generic::remove_copy_if to cpp::remove_copy_if
-  return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred);
-}
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/replace.h b/compat/thrust/system/tbb/detail/replace.h
deleted file mode 100644
index c48555d0e1..0000000000
--- a/compat/thrust/system/tbb/detail/replace.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits this algorithm
-#include <thrust/system/cpp/detail/scatter.h>
-
diff --git a/compat/thrust/system/tbb/detail/reverse.h b/compat/thrust/system/tbb/detail/reverse.h
deleted file mode 100644
index 04923d1f6a..0000000000
--- a/compat/thrust/system/tbb/detail/reverse.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits reverse
-#include <thrust/system/cpp/detail/reverse.h>
-
diff --git a/compat/thrust/system/tbb/detail/scan.h b/compat/thrust/system/tbb/detail/scan.h
deleted file mode 100644
index ed5cacd7e9..0000000000
--- a/compat/thrust/system/tbb/detail/scan.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file scan.h
- *  \brief TBB implementations of scan functions.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(tag,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op);
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(tag,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op);
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/scan.inl>
-
diff --git a/compat/thrust/system/tbb/detail/scan.inl b/compat/thrust/system/tbb/detail/scan.inl
deleted file mode 100644
index 48878241c9..0000000000
--- a/compat/thrust/system/tbb/detail/scan.inl
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/scan.h>
-#include <thrust/distance.h>
-#include <thrust/advance.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/detail/function.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/type_traits/function_traits.h>
-#include <thrust/detail/type_traits/iterator/is_output_iterator.h>
-#include <tbb/blocked_range.h>
-#include <tbb/parallel_scan.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace scan_detail
-{
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction,
-         typename ValueType>
-struct inclusive_body
-{
-  InputIterator input;
-  OutputIterator output;
-  thrust::detail::host_function<BinaryFunction,ValueType> binary_op;
-  ValueType sum;
-  bool first_call;
-
-  inclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType dummy)
-    : input(input), output(output), binary_op(binary_op), sum(dummy), first_call(true)
-  {}
-    
-  inclusive_body(inclusive_body& b, ::tbb::split)
-    : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true)
-  {}
-
-  template<typename Size> 
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::pre_scan_tag)
-  {
-    InputIterator iter = input + r.begin();
- 
-    ValueType temp = *iter;
-
-    ++iter;
-
-    for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter)
-      temp = binary_op(temp, *iter);
-
-    if (first_call)
-      sum = temp;
-    else
-      sum = binary_op(sum, temp);
-      
-    first_call = false;
-  }
-  
-  template<typename Size> 
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::final_scan_tag)
-  {
-    InputIterator  iter1 = input  + r.begin();
-    OutputIterator iter2 = output + r.begin();
-
-    if (first_call)
-    {
-      *iter2 = sum = *iter1;
-      ++iter1;
-      ++iter2;
-      for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter1, ++iter2)
-        *iter2 = sum = binary_op(sum, *iter1);
-    }
-    else
-    {
-      for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2)
-        *iter2 = sum = binary_op(sum, *iter1);
-    }
-
-    first_call = false;
-  }
-
-  void reverse_join(inclusive_body& b)
-  {
-    sum = binary_op(b.sum, sum);
-  } 
-
-  void assign(inclusive_body& b)
-  {
-    sum = b.sum;
-  } 
-};
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction,
-         typename ValueType>
-struct exclusive_body
-{
-  InputIterator input;
-  OutputIterator output;
-  thrust::detail::host_function<BinaryFunction,ValueType> binary_op;
-  ValueType sum;
-  bool first_call;
-
-  exclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType init)
-    : input(input), output(output), binary_op(binary_op), sum(init), first_call(true)
-  {}
-    
-  exclusive_body(exclusive_body& b, ::tbb::split)
-    : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true)
-  {}
-
-  template<typename Size> 
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::pre_scan_tag)
-  {
-    InputIterator iter = input + r.begin();
- 
-    ValueType temp = *iter;
-
-    ++iter;
-
-    for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter)
-      temp = binary_op(temp, *iter);
-
-    if (first_call && r.begin() > 0)
-      sum = temp;
-    else
-      sum = binary_op(sum, temp);
-      
-    first_call = false;
-  }
-  
-  template<typename Size> 
-  void operator()(const ::tbb::blocked_range<Size>& r, ::tbb::final_scan_tag)
-  {
-    InputIterator  iter1 = input  + r.begin();
-    OutputIterator iter2 = output + r.begin();
-
-    for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2)
-    {
-      ValueType temp = binary_op(sum, *iter1);
-      *iter2 = sum;
-      sum = temp;
-    }
-    
-    first_call = false;
-  }
-
-  void reverse_join(exclusive_body& b)
-  {
-    sum = binary_op(b.sum, sum);
-  } 
-
-  void assign(exclusive_body& b)
-  {
-    sum = b.sum;
-  } 
-};
-
-} // end scan_detail
-
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator inclusive_scan(tag,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                BinaryFunction binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-  
-  using namespace thrust::detail;
-
-  typedef typename eval_if<
-    has_result_type<BinaryFunction>::value,
-    result_type<BinaryFunction>,
-    eval_if<
-      is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-  
-  typedef typename thrust::iterator_difference<InputIterator>::type Size; 
-  
-  Size n = thrust::distance(first, last);
-
-  if (n != 0)
-  {
-    typedef typename scan_detail::inclusive_body<InputIterator,OutputIterator,BinaryFunction,ValueType> Body;
-    Body scan_body(first, result, binary_op, *first);
-    ::tbb::parallel_scan(::tbb::blocked_range<Size>(0,n), scan_body);
-  }
- 
-  thrust::advance(result, n);
-
-  return result;
-}
-
-
-template<typename InputIterator,
-         typename OutputIterator,
-         typename T,
-         typename BinaryFunction>
-  OutputIterator exclusive_scan(tag,
-                                InputIterator first,
-                                InputIterator last,
-                                OutputIterator result,
-                                T init,
-                                BinaryFunction binary_op)
-{
-  // the pseudocode for deducing the type of the temporary used below:
-  // 
-  // if BinaryFunction is AdaptableBinaryFunction
-  //   TemporaryType = AdaptableBinaryFunction::result_type
-  // else if OutputIterator is a "pure" output iterator
-  //   TemporaryType = InputIterator::value_type
-  // else
-  //   TemporaryType = OutputIterator::value_type
-  //
-  // XXX upon c++0x, TemporaryType needs to be:
-  // result_of<BinaryFunction>::type
-
-  using namespace thrust::detail;
-
-  typedef typename eval_if<
-    has_result_type<BinaryFunction>::value,
-    result_type<BinaryFunction>,
-    eval_if<
-      is_output_iterator<OutputIterator>::value,
-      thrust::iterator_value<InputIterator>,
-      thrust::iterator_value<OutputIterator>
-    >
-  >::type ValueType;
-
-  typedef typename thrust::iterator_difference<InputIterator>::type Size; 
-  
-  Size n = thrust::distance(first, last);
-
-  if (n != 0)
-  {
-    typedef typename scan_detail::exclusive_body<InputIterator,OutputIterator,BinaryFunction,ValueType> Body;
-    Body scan_body(first, result, binary_op, init);
-    ::tbb::parallel_scan(::tbb::blocked_range<Size>(0,n), scan_body);
-  }
- 
-  thrust::advance(result, n);
-
-  return result;
-} 
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/scan_by_key.h b/compat/thrust/system/tbb/detail/scan_by_key.h
deleted file mode 100644
index cad4fc1454..0000000000
--- a/compat/thrust/system/tbb/detail/scan_by_key.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits scan_by_key
-#include <thrust/system/cpp/detail/scan_by_key.h>
-
diff --git a/compat/thrust/system/tbb/detail/scatter.h b/compat/thrust/system/tbb/detail/scatter.h
deleted file mode 100644
index c48555d0e1..0000000000
--- a/compat/thrust/system/tbb/detail/scatter.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits this algorithm
-#include <thrust/system/cpp/detail/scatter.h>
-
diff --git a/compat/thrust/system/tbb/detail/sequence.h b/compat/thrust/system/tbb/detail/sequence.h
deleted file mode 100644
index 811d8f5fbb..0000000000
--- a/compat/thrust/system/tbb/detail/sequence.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits sequence
-#include <thrust/system/cpp/detail/sequence.h>
-
diff --git a/compat/thrust/system/tbb/detail/set_operations.h b/compat/thrust/system/tbb/detail/set_operations.h
deleted file mode 100644
index 687edb2e7d..0000000000
--- a/compat/thrust/system/tbb/detail/set_operations.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits set_operations
-#include <thrust/system/cpp/detail/set_operations.h>
-
diff --git a/compat/thrust/system/tbb/detail/sort.h b/compat/thrust/system/tbb/detail/sort.h
deleted file mode 100644
index 3b6f63075e..0000000000
--- a/compat/thrust/system/tbb/detail/sort.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-  void stable_sort(execution_policy<DerivedPolicy> &exec,
-                   RandomAccessIterator first,
-                   RandomAccessIterator last,
-                   StrictWeakOrdering comp);
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 keys_first,
-                          RandomAccessIterator1 keys_last,
-                          RandomAccessIterator2 values_first,
-                          StrictWeakOrdering comp);
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/sort.inl>
-
diff --git a/compat/thrust/system/tbb/detail/sort.inl b/compat/thrust/system/tbb/detail/sort.inl
deleted file mode 100644
index f292789067..0000000000
--- a/compat/thrust/system/tbb/detail/sort.inl
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/temporary_array.h>
-#include <thrust/detail/copy.h>
-#include <thrust/system/detail/internal/scalar/sort.h>
-#include <thrust/iterator/iterator_traits.h>
-#include <thrust/distance.h>
-#include <thrust/merge.h>
-#include <tbb/parallel_invoke.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-namespace sort_detail
-{
-
-// TODO tune this based on data type and comp
-const static int threshold = 128 * 1024;
-  
-template <typename DerivedPolicy, typename Iterator1, typename Iterator2, typename StrictWeakOrdering>
-void merge_sort(execution_policy<DerivedPolicy> &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace);
-
-template <typename DerivedPolicy, typename Iterator1, typename Iterator2, typename StrictWeakOrdering>
-struct merge_sort_closure
-{
-  execution_policy<DerivedPolicy> &exec;
-  Iterator1 first1, last1;
-  Iterator2 first2;
-  StrictWeakOrdering comp;
-  bool inplace;
-
-  merge_sort_closure(execution_policy<DerivedPolicy> &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace)
-    : exec(exec), first1(first1), last1(last1), first2(first2), comp(comp), inplace(inplace)
-  {}
-
-  void operator()(void) const
-  {
-    merge_sort(exec, first1, last1, first2, comp, inplace);
-  }
-};
-
-
-template <typename DerivedPolicy, typename Iterator1, typename Iterator2, typename StrictWeakOrdering>
-void merge_sort(execution_policy<DerivedPolicy> &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace)
-{
-  typedef typename thrust::iterator_difference<Iterator1>::type difference_type;
-
-  difference_type n = thrust::distance(first1, last1);
-
-  if (n < threshold)
-  {
-    thrust::system::detail::internal::scalar::stable_sort(first1, last1, comp);
-    
-    if (!inplace)
-      thrust::system::detail::internal::scalar::copy(first1, last1, first2);
-
-    return;
-  }
-
-  Iterator1 mid1  = first1 + (n / 2);
-  Iterator2 mid2  = first2 + (n / 2);
-  Iterator2 last2 = first2 + n;
-
-  typedef merge_sort_closure<DerivedPolicy,Iterator1,Iterator2,StrictWeakOrdering> Closure;
-  
-  Closure left (exec, first1, mid1,  first2, comp, !inplace);
-  Closure right(exec, mid1,   last1, mid2,   comp, !inplace);
-
-  ::tbb::parallel_invoke(left, right);
-
-  if (inplace) thrust::merge(exec, first2, mid2, mid2, last2, first1, comp);
-  else	       thrust::merge(exec, first1, mid1, mid1, last1, first2, comp);
-}
-
-} // end namespace sort_detail
-
-
-namespace sort_by_key_detail
-{
-
-// TODO tune this based on data type and comp
-const static int threshold = 128 * 1024;
-  
-template <typename DerivedPolicy,
-          typename Iterator1,
-          typename Iterator2,
-          typename Iterator3,
-          typename Iterator4,
-          typename StrictWeakOrdering>
-void merge_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                       Iterator1 first1,
-                       Iterator1 last1,
-                       Iterator2 first2,
-                       Iterator3 first3,
-                       Iterator4 first4,
-                       StrictWeakOrdering comp,
-                       bool inplace);
-
-template <typename DerivedPolicy,
-          typename Iterator1,
-          typename Iterator2,
-          typename Iterator3,
-          typename Iterator4,
-          typename StrictWeakOrdering>
-struct merge_sort_by_key_closure
-{
-  execution_policy<DerivedPolicy> &exec;
-  Iterator1 first1, last1;
-  Iterator2 first2;
-  Iterator3 first3;
-  Iterator4 first4;
-  StrictWeakOrdering comp;
-  bool inplace;
-
-  merge_sort_by_key_closure(execution_policy<DerivedPolicy> &exec,
-                            Iterator1 first1,
-                            Iterator1 last1,
-                            Iterator2 first2,
-                            Iterator3 first3,
-                            Iterator4 first4,
-                            StrictWeakOrdering comp,
-                            bool inplace)
-    : exec(exec), first1(first1), last1(last1), first2(first2), first3(first3), first4(first4), comp(comp), inplace(inplace)
-  {}
-
-  void operator()(void) const
-  {
-    merge_sort_by_key(exec, first1, last1, first2, first3, first4, comp, inplace);
-  }
-};
-
-
-template <typename DerivedPolicy,
-          typename Iterator1,
-          typename Iterator2,
-          typename Iterator3,
-          typename Iterator4,
-          typename StrictWeakOrdering>
-void merge_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                       Iterator1 first1,
-                       Iterator1 last1,
-                       Iterator2 first2,
-                       Iterator3 first3,
-                       Iterator4 first4,
-                       StrictWeakOrdering comp,
-                       bool inplace)
-{
-  typedef typename thrust::iterator_difference<Iterator1>::type difference_type;
-
-  difference_type n = thrust::distance(first1, last1);
-  
-  Iterator1 mid1  = first1 + (n / 2);
-  Iterator2 mid2  = first2 + (n / 2);
-  Iterator3 mid3  = first3 + (n / 2);
-  Iterator4 mid4  = first4 + (n / 2);
-  Iterator2 last2 = first2 + n;
-  Iterator3 last3 = first3 + n;
-
-  if (n < threshold)
-  {
-    thrust::system::detail::internal::scalar::stable_sort_by_key(first1, last1, first2, comp);
-    
-    if (!inplace)
-    {
-      thrust::system::detail::internal::scalar::copy(first1, last1, first3);
-      thrust::system::detail::internal::scalar::copy(first2, last2, first4);
-    }
-
-    return;
-  }
-
-  typedef merge_sort_by_key_closure<DerivedPolicy,Iterator1,Iterator2,Iterator3,Iterator4,StrictWeakOrdering> Closure;
-  
-  Closure left (exec, first1, mid1,  first2, first3, first4, comp, !inplace);
-  Closure right(exec, mid1,   last1, mid2,   mid3,   mid4,   comp, !inplace);
-
-  ::tbb::parallel_invoke(left, right);
-
-  if(inplace)
-  {
-    thrust::merge_by_key(exec, first3, mid3, mid3, last3, first4, mid4, first1, first2, comp);
-  }
-  else
-  {
-    thrust::merge_by_key(exec, first1, mid1, mid1, last1, first2, mid2, first3, first4, comp);
-  }
-}
-
-} // end namespace sort_detail
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator,
-         typename StrictWeakOrdering>
-void stable_sort(execution_policy<DerivedPolicy> &exec,
-                 RandomAccessIterator first,
-                 RandomAccessIterator last,
-                 StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator>::type key_type;
-
-  thrust::detail::temporary_array<key_type, DerivedPolicy> temp(exec, first, last);
-
-  sort_detail::merge_sort(exec, first, last, temp.begin(), comp, true);
-}
-
-template<typename DerivedPolicy,
-         typename RandomAccessIterator1,
-         typename RandomAccessIterator2,
-         typename StrictWeakOrdering>
-  void stable_sort_by_key(execution_policy<DerivedPolicy> &exec,
-                          RandomAccessIterator1 first1,
-                          RandomAccessIterator1 last1,
-                          RandomAccessIterator2 first2,
-                          StrictWeakOrdering comp)
-{
-  typedef typename thrust::iterator_value<RandomAccessIterator1>::type key_type;
-  typedef typename thrust::iterator_value<RandomAccessIterator2>::type val_type;
-
-  RandomAccessIterator2 last2 = first2 + thrust::distance(first1, last1);
-
-  thrust::detail::temporary_array<key_type, DerivedPolicy> temp1(exec, first1, last1);
-  thrust::detail::temporary_array<val_type, DerivedPolicy> temp2(exec, first2, last2);
-
-  sort_by_key_detail::merge_sort_by_key(exec, first1, last1, first2, temp1.begin(), temp2.begin(), comp, true);
-}
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/swap_ranges.h b/compat/thrust/system/tbb/detail/swap_ranges.h
deleted file mode 100644
index 15f8f55310..0000000000
--- a/compat/thrust/system/tbb/detail/swap_ranges.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// tbb inherits swap_ranges
-#include <thrust/system/cpp/detail/swap_ranges.h>
-
diff --git a/compat/thrust/system/tbb/detail/tabulate.h b/compat/thrust/system/tbb/detail/tabulate.h
deleted file mode 100644
index da65d8e44d..0000000000
--- a/compat/thrust/system/tbb/detail/tabulate.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits tabulate
-#include <thrust/system/cpp/detail/tabulate.h>
-
diff --git a/compat/thrust/system/tbb/detail/temporary_buffer.h b/compat/thrust/system/tbb/detail/temporary_buffer.h
deleted file mode 100644
index 628bd75719..0000000000
--- a/compat/thrust/system/tbb/detail/temporary_buffer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system has no special temporary buffer functions
-
diff --git a/compat/thrust/system/tbb/detail/transform.h b/compat/thrust/system/tbb/detail/transform.h
deleted file mode 100644
index 70ce1f41b6..0000000000
--- a/compat/thrust/system/tbb/detail/transform.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// omp inherits transform
-#include <thrust/system/cpp/detail/transform.h>
-
diff --git a/compat/thrust/system/tbb/detail/transform_reduce.h b/compat/thrust/system/tbb/detail/transform_reduce.h
deleted file mode 100644
index 23ed07054a..0000000000
--- a/compat/thrust/system/tbb/detail/transform_reduce.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits transform_reduce
-#include <thrust/system/cpp/detail/transform_reduce.h>
-
diff --git a/compat/thrust/system/tbb/detail/transform_scan.h b/compat/thrust/system/tbb/detail/transform_scan.h
deleted file mode 100644
index fc2e55d0c0..0000000000
--- a/compat/thrust/system/tbb/detail/transform_scan.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits transform_scan
-#include <thrust/system/cpp/detail/transform_scan.h>
-
diff --git a/compat/thrust/system/tbb/detail/uninitialized_copy.h b/compat/thrust/system/tbb/detail/uninitialized_copy.h
deleted file mode 100644
index 944f4baf0e..0000000000
--- a/compat/thrust/system/tbb/detail/uninitialized_copy.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits uninitialized_copy
-#include <thrust/system/cpp/detail/uninitialized_copy.h>
-
diff --git a/compat/thrust/system/tbb/detail/uninitialized_fill.h b/compat/thrust/system/tbb/detail/uninitialized_fill.h
deleted file mode 100644
index b9d6de20fa..0000000000
--- a/compat/thrust/system/tbb/detail/uninitialized_fill.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-// this system inherits uninitialized_fill
-#include <thrust/system/cpp/detail/uninitialized_fill.h>
-
diff --git a/compat/thrust/system/tbb/detail/unique.h b/compat/thrust/system/tbb/detail/unique.h
deleted file mode 100644
index 34538cac7b..0000000000
--- a/compat/thrust/system/tbb/detail/unique.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename ExecutionPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(execution_policy<ExecutionPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred);
-
-
-template<typename ExecutionPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(execution_policy<ExecutionPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred);
-
-
-} // end namespace detail
-} // end namespace tbb 
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/unique.inl>
-
diff --git a/compat/thrust/system/tbb/detail/unique.inl b/compat/thrust/system/tbb/detail/unique.inl
deleted file mode 100644
index 06e6a30bb1..0000000000
--- a/compat/thrust/system/tbb/detail/unique.inl
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/unique.h>
-#include <thrust/system/detail/generic/unique.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-  ForwardIterator unique(execution_policy<DerivedPolicy> &exec,
-                         ForwardIterator first,
-                         ForwardIterator last,
-                         BinaryPredicate binary_pred)
-{
-  // tbb prefers generic::unique to cpp::unique
-  return thrust::system::detail::generic::unique(exec,first,last,binary_pred);
-} // end unique()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-  OutputIterator unique_copy(execution_policy<DerivedPolicy> &exec,
-                             InputIterator first,
-                             InputIterator last,
-                             OutputIterator output,
-                             BinaryPredicate binary_pred)
-{
-  // tbb prefers generic::unique_copy to cpp::unique_copy
-  return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred);
-} // end unique_copy()
-
-
-} // end namespace detail
-} // end namespace tbb 
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/unique_by_key.h b/compat/thrust/system/tbb/detail/unique_by_key.h
deleted file mode 100644
index c6d053243f..0000000000
--- a/compat/thrust/system/tbb/detail/unique_by_key.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred);
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred);
-
-
-} // end namespace detail
-} // end namespace tbb 
-} // end namespace system
-} // end namespace thrust
-
-#include <thrust/system/tbb/detail/unique_by_key.inl>
-
diff --git a/compat/thrust/system/tbb/detail/unique_by_key.inl b/compat/thrust/system/tbb/detail/unique_by_key.inl
deleted file mode 100644
index 7747ca4c2e..0000000000
--- a/compat/thrust/system/tbb/detail/unique_by_key.inl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/unique_by_key.h>
-#include <thrust/system/detail/generic/unique_by_key.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-namespace detail
-{
-
-
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(execution_policy<DerivedPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred)
-{
-  // tbb prefers generic::unique_by_key to cpp::unique_by_key
-  return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred);
-} // end unique_by_key()
-
-
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(execution_policy<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_output,
-                       OutputIterator2 values_output,
-                       BinaryPredicate binary_pred)
-{
-  // tbb prefers generic::unique_by_key_copy to cpp::unique_by_key_copy
-  return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred);
-} // end unique_by_key_copy()
-
-
-} // end namespace detail
-} // end namespace tbb
-} // end namespace system
-} // end namespace thrust
-
diff --git a/compat/thrust/system/tbb/detail/vector.inl b/compat/thrust/system/tbb/detail/vector.inl
deleted file mode 100644
index d87e670fd6..0000000000
--- a/compat/thrust/system/tbb/detail/vector.inl
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/vector.h>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector()
-      : super_t()
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n)
-      : super_t(n)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(size_type n, const value_type &value)
-      : super_t(n,value)
-{}
-
-template<typename T, typename Allocator>
-  vector<T,Allocator>
-    ::vector(const vector &x)
-      : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator>
-      ::vector(const std::vector<OtherT,OtherAllocator> &x)
-        : super_t(x)
-{}
-
-template<typename T, typename Allocator>
-  template<typename InputIterator>
-    vector<T,Allocator>
-      ::vector(InputIterator first, InputIterator last)
-        : super_t(first,last)
-{}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const std::vector<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-
-template<typename T, typename Allocator>
-  template<typename OtherT, typename OtherAllocator>
-    vector<T,Allocator> &
-      vector<T,Allocator>
-        ::operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x)
-{
-  super_t::operator=(x);
-  return *this;
-}
-      
-} // end tbb
-} // end system
-} // end thrust
-
diff --git a/compat/thrust/system/tbb/execution_policy.h b/compat/thrust/system/tbb/execution_policy.h
deleted file mode 100644
index c462586f82..0000000000
--- a/compat/thrust/system/tbb/execution_policy.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-#pragma once
-
-/*! \file thrust/system/tbb/execution_policy.h
- *  \brief Execution policies for Thrust's TBB system.
- */
-
-#include <thrust/detail/config.h>
-
-// get the execution policies definitions first
-#include <thrust/system/tbb/detail/execution_policy.h>
-
-// get the definition of par
-#include <thrust/system/tbb/detail/par.h>
-
-// now get all the algorithm definitions
-
-#include <thrust/system/tbb/detail/adjacent_difference.h>
-#include <thrust/system/tbb/detail/assign_value.h>
-#include <thrust/system/tbb/detail/binary_search.h>
-#include <thrust/system/tbb/detail/copy.h>
-#include <thrust/system/tbb/detail/copy_if.h>
-#include <thrust/system/tbb/detail/count.h>
-#include <thrust/system/tbb/detail/equal.h>
-#include <thrust/system/tbb/detail/extrema.h>
-#include <thrust/system/tbb/detail/fill.h>
-#include <thrust/system/tbb/detail/find.h>
-#include <thrust/system/tbb/detail/for_each.h>
-#include <thrust/system/tbb/detail/gather.h>
-#include <thrust/system/tbb/detail/generate.h>
-#include <thrust/system/tbb/detail/get_value.h>
-#include <thrust/system/tbb/detail/inner_product.h>
-#include <thrust/system/tbb/detail/iter_swap.h>
-#include <thrust/system/tbb/detail/logical.h>
-#include <thrust/system/tbb/detail/malloc_and_free.h>
-#include <thrust/system/tbb/detail/merge.h>
-#include <thrust/system/tbb/detail/mismatch.h>
-#include <thrust/system/tbb/detail/partition.h>
-#include <thrust/system/tbb/detail/reduce.h>
-#include <thrust/system/tbb/detail/reduce_by_key.h>
-#include <thrust/system/tbb/detail/remove.h>
-#include <thrust/system/tbb/detail/replace.h>
-#include <thrust/system/tbb/detail/reverse.h>
-#include <thrust/system/tbb/detail/scan.h>
-#include <thrust/system/tbb/detail/scan_by_key.h>
-#include <thrust/system/tbb/detail/scatter.h>
-#include <thrust/system/tbb/detail/sequence.h>
-#include <thrust/system/tbb/detail/set_operations.h>
-#include <thrust/system/tbb/detail/sort.h>
-#include <thrust/system/tbb/detail/swap_ranges.h>
-#include <thrust/system/tbb/detail/tabulate.h>
-#include <thrust/system/tbb/detail/transform.h>
-#include <thrust/system/tbb/detail/transform_reduce.h>
-#include <thrust/system/tbb/detail/transform_scan.h>
-#include <thrust/system/tbb/detail/uninitialized_copy.h>
-#include <thrust/system/tbb/detail/uninitialized_fill.h>
-#include <thrust/system/tbb/detail/unique.h>
-#include <thrust/system/tbb/detail/unique_by_key.h>
-
-
-// define these entities here for the purpose of Doxygenating them
-// they are actually defined elsewhere
-#if 0
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-
-
-/*! \addtogroup execution_policies
- *  \{
- */
-
-
-/*! \p thrust::tbb::execution_policy is the base class for all Thrust parallel execution
- *  policies which are derived from Thrust's TBB backend system.
- */
-template<typename DerivedPolicy>
-struct execution_policy : thrust::execution_policy<DerivedPolicy>
-{};
-
-
-/*! \p tbb::tag is a type representing Thrust's TBB backend system in C++'s type system.
- *  Iterators "tagged" with a type which is convertible to \p tbb::tag assert that they may be
- *  "dispatched" to algorithm implementations in the \p tbb system.
- */
-struct tag : thrust::system::tbb::execution_policy<tag> { unspecified };
-
-
-/*! \p thrust::tbb::par is the parallel execution policy associated with Thrust's TBB
- *  backend system.
- *
- *  Instead of relying on implicit algorithm dispatch through iterator system tags, users may
- *  directly target Thrust's TBB backend system by providing \p thrust::tbb::par as an algorithm
- *  parameter.
- *
- *  Explicit dispatch can be useful in avoiding the introduction of data copies into containers such
- *  as \p thrust::tbb::vector.
- *
- *  The type of \p thrust::tbb::par is implementation-defined.
- *
- *  The following code snippet demonstrates how to use \p thrust::tbb::par to explicitly dispatch an
- *  invocation of \p thrust::for_each to the TBB backend system:
- *
- *  \code
- *  #include <thrust/for_each.h>
- *  #include <thrust/system/tbb/execution_policy.h>
- *  #include <cstdio>
- *
- *  struct printf_functor
- *  {
- *    __host__ __device__
- *    void operator()(int x)
- *    {
- *      printf("%d\n");
- *    }
- *  };
- *  ...
- *  int vec[3];
- *  vec[0] = 0; vec[1] = 1; vec[2] = 2;
- *
- *  thrust::for_each(thrust::tbb::par, vec.begin(), vec.end(), printf_functor());
- *
- *  // 0 1 2 is printed to standard output in some unspecified order
- *  \endcode
- */
-static const unspecified par;
-
-
-/*! \}
- */
-
-
-} // end tbb
-} // end system
-} // end thrust
-#endif
-
-
diff --git a/compat/thrust/system/tbb/memory.h b/compat/thrust/system/tbb/memory.h
deleted file mode 100644
index deea7eed7d..0000000000
--- a/compat/thrust/system/tbb/memory.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/tbb/memory.h
- *  \brief Managing memory associated with Thrust's TBB system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/detail/execution_policy.h>
-#include <thrust/memory.h>
-#include <thrust/detail/type_traits.h>
-#include <thrust/detail/allocator/malloc_allocator.h>
-#include <ostream>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-
-template<typename> class pointer;
-
-} // end tbb
-} // end system
-} // end thrust
-
-
-/*! \cond
- */
-
-// specialize std::iterator_traits to avoid problems with the name of
-// pointer's constructor shadowing its nested pointer type
-// do this before pointer is defined so the specialization is correctly
-// used inside the definition
-namespace std
-{
-
-template<typename Element>
-  struct iterator_traits<thrust::system::tbb::pointer<Element> >
-{
-  private:
-    typedef thrust::system::tbb::pointer<Element> ptr;
-
-  public:
-    typedef typename ptr::iterator_category       iterator_category;
-    typedef typename ptr::value_type              value_type;
-    typedef typename ptr::difference_type         difference_type;
-    typedef ptr                                   pointer;
-    typedef typename ptr::reference               reference;
-}; // end iterator_traits
-
-} // end std
-
-/*! \endcond
- */
-
-
-namespace thrust
-{
-namespace system
-{
-
-/*! \addtogroup system_backends Systems
- *  \ingroup system
- *  \{
- */
-
-/*! \namespace thrust::system::tbb
- *  \brief \p thrust::system::tbb is the namespace containing functionality for allocating, manipulating,
- *         and deallocating memory available to Thrust's TBB backend system.
- *         The identifiers are provided in a separate namespace underneath <tt>thrust::system</tt>
- *         for import convenience but are also aliased in the top-level <tt>thrust::tbb</tt>
- *         namespace for easy access.
- *
- */
-namespace tbb
-{
-
-// forward declaration of reference for pointer
-template<typename Element> class reference;
-
-/*! \cond
- */
-
-// XXX nvcc + msvc have trouble instantiating reference below
-//     this is a workaround
-namespace detail
-{
-
-template<typename Element>
-  struct reference_msvc_workaround
-{
-  typedef thrust::system::tbb::reference<Element> type;
-}; // end reference_msvc_workaround
-
-} // end detail
-
-/*! \endcond
- */
-
-
-/*! \p pointer stores a pointer to an object allocated in memory available to the tbb system.
- *  This type provides type safety when dispatching standard algorithms on ranges resident
- *  in tbb memory.
- *
- *  \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic.
- *
- *  \p pointer can be created with the function \p tbb::malloc, or by explicitly calling its constructor
- *  with a raw pointer.
- *
- *  The raw pointer encapsulated by a \p pointer may be obtained by eiter its <tt>get</tt> member function
- *  or the \p raw_pointer_cast function.
- *
- *  \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory
- *  pointed to by \p pointer.
- *
- *  \tparam T specifies the type of the pointee.
- *
- *  \see tbb::malloc
- *  \see tbb::free
- *  \see raw_pointer_cast
- */
-template<typename T>
-  class pointer
-    : public thrust::pointer<
-               T,
-               thrust::system::tbb::tag,
-               thrust::system::tbb::reference<T>,
-               thrust::system::tbb::pointer<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::pointer<
-      T,
-      thrust::system::tbb::tag,
-      //thrust::system::tbb::reference<T>,
-      typename detail::reference_msvc_workaround<T>::type,
-      thrust::system::tbb::pointer<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    // note that tbb::pointer's member functions need __host__ __device__
-    // to interoperate with nvcc + iterators' dereference member function
-
-    /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0.
-     */
-    __host__ __device__
-    pointer() : super_t() {}
-
-    /*! This constructor allows construction of a <tt>pointer<const T></tt> from a <tt>T*</tt>.
-     *
-     *  \param ptr A raw pointer to copy from, presumed to point to a location in memory
-     *         accessible by the \p tbb system.
-     *  \tparam OtherT \p OtherT shall be convertible to \p T.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    explicit pointer(OtherT *ptr) : super_t(ptr) {}
-
-    /*! This constructor allows construction from another pointer-like object with related type.
-     *
-     *  \param other The \p OtherPointer to copy.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::tbb::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    pointer(const OtherPointer &other,
-            typename thrust::detail::enable_if_pointer_is_convertible<
-              OtherPointer,
-              pointer
-            >::type * = 0) : super_t(other) {}
-
-    /*! Assignment operator allows assigning from another pointer-like object with related type.
-     *
-     *  \param other The other pointer-like object to assign from.
-     *  \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible
-     *          to \p thrust::system::tbb::tag and its element type shall be convertible to \p T.
-     */
-    template<typename OtherPointer>
-    __host__ __device__
-    typename thrust::detail::enable_if_pointer_is_convertible<
-      OtherPointer,
-      pointer,
-      pointer &
-    >::type
-    operator=(const OtherPointer &other)
-    {
-      return super_t::operator=(other);
-    }
-}; // end pointer
-
-
-/*! \p reference is a wrapped reference to an object stored in memory available to the \p tbb system.
- *  \p reference is the type of the result of dereferencing a \p tbb::pointer.
- *
- *  \tparam T Specifies the type of the referenced object.
- */
-template<typename T>
-  class reference
-    : public thrust::reference<
-               T,
-               thrust::system::tbb::pointer<T>,
-               thrust::system::tbb::reference<T>
-             >
-{
-  /*! \cond
-   */
-
-  private:
-    typedef thrust::reference<
-      T,
-      thrust::system::tbb::pointer<T>,
-      thrust::system::tbb::reference<T>
-    > super_t;
-
-  /*! \endcond
-   */
-
-  public:
-    /*! \cond
-     */
-
-    typedef typename super_t::value_type value_type;
-    typedef typename super_t::pointer    pointer;
-
-    /*! \endcond
-     */
-
-    /*! This constructor initializes this \p reference to refer to an object
-     *  pointed to by the given \p pointer. After this \p reference is constructed,
-     *  it shall refer to the object pointed to by \p ptr.
-     *
-     *  \param ptr A \p pointer to copy from.
-     */
-    __host__ __device__
-    explicit reference(const pointer &ptr)
-      : super_t(ptr)
-    {}
-
-    /*! This constructor accepts a const reference to another \p reference of related type.
-     *  After this \p reference is constructed, it shall refer to the same object as \p other.
-     *
-     *  \param other A \p reference to copy from.
-     *  \tparam OtherT The element type of the other \p reference.
-     *
-     *  \note This constructor is templated primarily to allow initialization of <tt>reference<const T></tt>
-     *        from <tt>reference<T></tt>.
-     */
-    template<typename OtherT>
-    __host__ __device__
-    reference(const reference<OtherT> &other,
-              typename thrust::detail::enable_if_convertible<
-                typename reference<OtherT>::pointer,
-                pointer
-              >::type * = 0)
-      : super_t(other)
-    {}
-
-    /*! Copy assignment operator copy assigns from another \p reference of related type.
-     *
-     *  \param other The other \p reference to assign from.
-     *  \return <tt>*this</tt>
-     *  \tparam OtherT The element type of the other \p reference.
-     */
-    template<typename OtherT>
-    reference &operator=(const reference<OtherT> &other);
-
-    /*! Assignment operator assigns from a \p value_type.
-     *
-     *  \param x The \p value_type to assign from.
-     *  \return <tt>*this</tt>
-     */
-    reference &operator=(const value_type &x);
-}; // end reference
-
-/*! Exchanges the values of two objects referred to by \p reference.
- *  \p x The first \p reference of interest.
- *  \p y The second \p reference ot interest.
- */
-template<typename T>
-__host__ __device__
-void swap(reference<T> x, reference<T> y);
-
-/*! Allocates an area of memory available to Thrust's <tt>tbb</tt> system.
- *  \param n Number of bytes to allocate.
- *  \return A <tt>tbb::pointer<void></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>tbb::pointer<void></tt> is returned if
- *          an error occurs.
- *  \note The <tt>tbb::pointer<void></tt> returned by this function must be
- *        deallocated with \p tbb::free.
- *  \see tbb::free
- *  \see std::malloc
- */
-inline pointer<void> malloc(std::size_t n);
-
-/*! Allocates a typed area of memory available to Thrust's <tt>tbb</tt> system.
- *  \param n Number of elements to allocate.
- *  \return A <tt>tbb::pointer<T></tt> pointing to the beginning of the newly
- *          allocated memory. A null <tt>tbb::pointer<T></tt> is returned if
- *          an error occurs.
- *  \note The <tt>tbb::pointer<T></tt> returned by this function must be
- *        deallocated with \p tbb::free.
- *  \see tbb::free
- *  \see std::malloc
- */
-template<typename T>
-inline pointer<T> malloc(std::size_t n);
-
-/*! Deallocates an area of memory previously allocated by <tt>tbb::malloc</tt>.
- *  \param ptr A <tt>tbb::pointer<void></tt> pointing to the beginning of an area
- *         of memory previously allocated with <tt>tbb::malloc</tt>.
- *  \see tbb::malloc
- *  \see std::free
- */
-inline void free(pointer<void> ptr);
-
-// XXX upon c++11
-// template<typename T> using allocator = thrust::detail::malloc_allocator<T,tag,pointer<T> >;
-
-/*! \p tbb::allocator is the default allocator used by the \p tbb system's containers such as
- *  <tt>tbb::vector</tt> if no user-specified allocator is provided. \p tbb::allocator allocates
- *  (deallocates) storage with \p tbb::malloc (\p tbb::free).
- */
-template<typename T>
-  struct allocator
-    : thrust::detail::malloc_allocator<
-        T,
-        tag,
-        pointer<T>
-      >
-{
-  /*! The \p rebind metafunction provides the type of an \p allocator
-   *  instantiated with another type.
-   *
-   *  \tparam U The other type to use for instantiation.
-   */
-  template<typename U>
-    struct rebind
-  {
-    /*! The typedef \p other gives the type of the rebound \p allocator.
-     */
-    typedef allocator<U> other;
-  };
-
-  /*! No-argument constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator() {}
-
-  /*! Copy constructor has no effect.
-   */
-  __host__ __device__
-  inline allocator(const allocator &) {}
-
-  /*! Constructor from other \p allocator has no effect.
-   */
-  template<typename U>
-  __host__ __device__
-  inline allocator(const allocator<U> &) {}
-
-  /*! Destructor has no effect.
-   */
-  __host__ __device__
-  inline ~allocator() {}
-}; // end allocator
-
-} // end tbb
-
-/*! \}
- */
-
-} // end system
-
-/*! \namespace thrust::tbb
- *  \brief \p thrust::tbb is a top-level alias for thrust::system::tbb.
- */
-namespace tbb
-{
-
-using thrust::system::tbb::pointer;
-using thrust::system::tbb::reference;
-using thrust::system::tbb::malloc;
-using thrust::system::tbb::free;
-using thrust::system::tbb::allocator;
-
-} // end tbb
-
-} // end thrust
-
-#include <thrust/system/tbb/detail/memory.inl>
-
diff --git a/compat/thrust/system/tbb/vector.h b/compat/thrust/system/tbb/vector.h
deleted file mode 100644
index 1c49c3f9e0..0000000000
--- a/compat/thrust/system/tbb/vector.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in ctbbliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system/tbb/vector.h
- *  \brief A dynamically-sizable array of elements which reside in memory available to
- *         Thrust's TBB system.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/system/tbb/memory.h>
-#include <thrust/detail/vector_base.h>
-#include <vector>
-
-namespace thrust
-{
-namespace system
-{
-namespace tbb
-{
-
-// XXX upon c++11
-// template<typename T, typename Allocator = allocator<T> > using vector = thrust::detail::vector_base<T,Allocator>;
-
-/*! \p tbb::vector is a container that supports random access to elements,
- *  constant time removal of elements at the end, and linear time insertion
- *  and removal of elements at the beginning or in the middle. The number of
- *  elements in a \p tbb::vector may vary dynamically; memory management is
- *  automatic. The elements contained in a \p tbb::vector reside in memory
- *  available to the \p tbb system.
- *
- *  \tparam T The element type of the \p tbb::vector.
- *  \tparam Allocator The allocator type of the \p tbb::vector. Defaults to \p tbb::allocator.
- *
- *  \see http://www.sgi.com/tech/stl/Vector.html
- *  \see host_vector For the documentation of the complete interface which is
- *                   shared by \p tbb::vector
- *  \see device_vector
- */
-template<typename T, typename Allocator = allocator<T> >
-  class vector
-    : public thrust::detail::vector_base<T,Allocator>
-{
-  /*! \cond
-   */
-  private:
-    typedef thrust::detail::vector_base<T,Allocator> super_t;
-  /*! \endcond
-   */
-
-  public:
-
-  /*! \cond
-   */
-    typedef typename super_t::size_type  size_type;
-    typedef typename super_t::value_type value_type;
-  /*! \endcond
-   */
-
-    /*! This constructor creates an empty \p tbb::vector.
-     */
-    vector();
-
-    /*! This constructor creates a \p tbb::vector with \p n default-constructed elements.
-     *  \param n The size of the \p tbb::vector to create.
-     */
-    explicit vector(size_type n);
-
-    /*! This constructor creates a \p tbb::vector with \p n copies of \p value.
-     *  \param n The size of the \p tbb::vector to create.
-     *  \param value An element to copy.
-     */
-    explicit vector(size_type n, const value_type &value);
-
-    /*! Copy constructor copies from another \p tbb::vector.
-     *  \param x The other \p tbb::vector to copy.
-     */
-    vector(const vector &x);
-
-    /*! This constructor copies from another Thrust vector-like object.
-     *  \param x The other object to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-
-    /*! This constructor copies from a \c std::vector.
-     *  \param x The \c std::vector to copy from.
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! This constructor creates a \p tbb::vector by copying from a range.
-     *  \param first The beginning of the range.
-     *  \param last The end of the range.
-     */
-    template<typename InputIterator>
-    vector(InputIterator first, InputIterator last);
-
-    // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns
-
-    /*! Assignment operator assigns from a \c std::vector.
-     *  \param x The \c std::vector to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const std::vector<OtherT,OtherAllocator> &x);
-
-    /*! Assignment operator assigns from another Thrust vector-like object.
-     *  \param x The other object to assign from.
-     *  \return <tt>*this</tt>
-     */
-    template<typename OtherT, typename OtherAllocator>
-    vector &operator=(const thrust::detail::vector_base<OtherT,OtherAllocator> &x);
-}; // end vector
-
-} // end tbb
-} // end system
-
-// alias system::tbb names at top-level
-namespace tbb
-{
-
-using thrust::system::tbb::vector;
-
-} // end tbb
-
-} // end thrust
-
-#include <thrust/system/tbb/detail/vector.inl>
-
diff --git a/compat/thrust/system_error.h b/compat/thrust/system_error.h
deleted file mode 100644
index ce88fe6bd8..0000000000
--- a/compat/thrust/system_error.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file thrust/system_error.h
- *  \brief System diagnostics
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-namespace thrust
-{
-
-/*! \addtogroup system System Access
- *  \{
- */
-
-/*! \namespace thrust::system
- *  \brief \p thrust::system is the namespace which contains functionality for manipulating
- *         memory specific to one of Thrust's backend systems. It also contains functionality
- *         for reporting error conditions originating from the operating system or other
- *         low-level application program interfaces such as the CUDA runtime.
- *         They are provided in a separate namespace for import convenience but are
- *         also aliased in the top-level \p thrust namespace for easy access.
- */
-namespace system
-{
-} // end system
-
-/*! \} // end system
- */
-
-} // end thrust
-
-#include <thrust/system/error_code.h>
-#include <thrust/system/system_error.h>
-
diff --git a/compat/thrust/tabulate.h b/compat/thrust/tabulate.h
deleted file mode 100644
index c87edf01ab..0000000000
--- a/compat/thrust/tabulate.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file tabulate.h
- *  \brief Fills a range with the tabulation of a function
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup transformations
- *  \{
- */
-
-
-/*! \p tabulate fills the range <tt>[first, last)</tt> with the value of a function applied to each
- *     element's index.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, \p tabulate performs the assignment
- *  <tt>*i = unary_op(i - first)</tt>.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the range.
- *  \param last The end of the range.
- *  \param unary_op The unary operation to apply.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam UnaryOperation is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                         and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers
- *  using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/tabulate.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::tabulate(thrust::host, A, A + 10, thrust::negate<int>());
- *  // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9}
- *  \endcode
- *
- *  \see thrust::fill
- *  \see thrust::generate
- *  \see thrust::sequence
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename UnaryOperation>
-  void tabulate(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op);
-
-
-/*! \p tabulate fills the range <tt>[first, last)</tt> with the value of a function applied to each
- *     element's index.
- *
- *  For each iterator \c i in the range <tt>[first, last)</tt>, \p tabulate performs the assignment
- *  <tt>*i = unary_op(i - first)</tt>.
- *
- *  \param first The beginning of the range.
- *  \param last The end of the range.
- *  \param unary_op The unary operation to apply.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then <tt>x + y</tt> is defined,
- *          and if \c T is \p ForwardIterator's \c value_type, then <tt>T(0)</tt> is defined.
- *  \tparam UnaryOperation is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                         and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers:
- *
- *  \code
- *  #include <thrust/tabulate.h>
- *  #include <thrust/functional.h>
- *  ...
- *  const int N = 10;
- *  int A[N];
- *  thrust::tabulate(A, A + 10, thrust::negate<int>());
- *  // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9}
- *  \endcode
- *
- *  \see thrust::fill
- *  \see thrust::generate
- *  \see thrust::sequence
- */
-template<typename ForwardIterator, typename UnaryOperation>
-  void tabulate(ForwardIterator first,
-                ForwardIterator last,
-                UnaryOperation unary_op);
-
-
-/*! \} // end transformations
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/tabulate.inl>
-
diff --git a/compat/thrust/transform.h b/compat/thrust/transform.h
deleted file mode 100644
index 1ada105828..0000000000
--- a/compat/thrust/transform.h
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform.h
- *  \brief Transforms input ranges using a function object
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup transformations
- *  \ingroup algorithms
- *  \{
- */
-
-
-/*! This version of \p transform applies a unary function to each element
- *  of an input sequence and stores the result in the corresponding 
- *  position in an output sequence.  Specifically, for each iterator 
- *  <tt>i</tt> in the range [\p first, \p last) the operation 
- *  <tt>op(*i)</tt> is performed and the result is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  [\p result, \p result + (\p last - \p first) ).  The input and
- *  output sequences may coincide, resulting in an in-place transformation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                              and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform to negate a range in-place
- *  using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::negate<int> op;
- *
- *  thrust::transform(thrust::host, data, data + 10, data, op); // in-place transformation
- *
- *  // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/transform.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first, InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op);
-
-	
-/*! This version of \p transform applies a unary function to each element
- *  of an input sequence and stores the result in the corresponding 
- *  position in an output sequence.  Specifically, for each iterator 
- *  <tt>i</tt> in the range [\p first, \p last) the operation 
- *  <tt>op(*i)</tt> is performed and the result is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  [\p result, \p result + (\p last - \p first) ).  The input and
- *  output sequences may coincide, resulting in an in-place transformation.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                              and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- * 
- *  thrust::negate<int> op;
- *
- *  thrust::transform(data, data + 10, data, op); // in-place transformation
- *
- *  // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/transform.html
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction>
-  OutputIterator transform(InputIterator first, InputIterator last,
-                           OutputIterator result,
-                           UnaryFunction op);
-
-
-/*! This version of \p transform applies a binary function to each pair
- *  of elements from two input sequences and stores the result in the
- *  corresponding position in an output sequence.  Specifically, for
- *  each iterator <tt>i</tt> in the range [\p first1, \p last1) and 
- *  <tt>j = first + (i - first1)</tt> in the range [\p first2, \p last2)
- *  the operation <tt>op(*i,*j)</tt> is performed and the result is 
- *  assigned to <tt>*o</tt>,  where <tt>o</tt> is the corresponding
- *  output iterator in the range [\p result, \p result + (\p last - \p first) ).
- *  The input and output sequences may coincide, resulting in an 
- *  in-place transformation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input sequence.
- *  \param last1 The end of the first input sequence.
- *  \param first2 The beginning of the second input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p first2 may equal \p result, but the range <tt>[first2, first2 + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform to compute the sum of two
- *  ranges using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int input1[6] = {-5,  0,  2,  3,  2,  4};
- *  int input2[6] = { 3,  6, -2,  1,  2,  3};
- *  int output[6];
- * 
- *  thrust::plus<int> op;
- *
- *  thrust::transform(thrust::host, input1, input1 + 6, input2, output, op);
- *
- *  // output is now {-2,  6,  0,  4,  4,  7};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/transform.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator1 first1, InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op);
-
-
-/*! This version of \p transform applies a binary function to each pair
- *  of elements from two input sequences and stores the result in the
- *  corresponding position in an output sequence.  Specifically, for
- *  each iterator <tt>i</tt> in the range [\p first1, \p last1) and 
- *  <tt>j = first + (i - first1)</tt> in the range [\p first2, \p last2)
- *  the operation <tt>op(*i,*j)</tt> is performed and the result is 
- *  assigned to <tt>*o</tt>,  where <tt>o</tt> is the corresponding
- *  output iterator in the range [\p result, \p result + (\p last - \p first) ).
- *  The input and output sequences may coincide, resulting in an 
- *  in-place transformation.
- *    
- *  \param first1 The beginning of the first input sequence.
- *  \param last1 The end of the first input sequence.
- *  \param first2 The beginning of the second input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                        and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first1 may equal \p result, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p first2 may equal \p result, but the range <tt>[first2, first2 + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  
- *  int input1[6] = {-5,  0,  2,  3,  2,  4};
- *  int input2[6] = { 3,  6, -2,  1,  2,  3};
- *  int output[6];
- * 
- *  thrust::plus<int> op;
- *
- *  thrust::transform(input1, input1 + 6, input2, output, op);
- *
- *  // output is now {-2,  6,  0,  4,  4,  7};
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/transform.html
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator,
-         typename BinaryFunction>
-  OutputIterator transform(InputIterator1 first1, InputIterator1 last1,
-                           InputIterator2 first2,
-                           OutputIterator result,
-                           BinaryFunction op);
-
-
-/*! This version of \p transform_if conditionally applies a unary function
- *  to each element of an input sequence and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in the input sequence
- *  satifies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first, last)</tt> the
- *  predicate <tt>pred(*i)</tt> is evaluated. If this predicate
- *  evaluates to \c true, the result of <tt>op(*i)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last - first) )</tt>. Otherwise, <tt>op(*i)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                        and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type,
- *                        and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                        and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if to negate the odd-valued
- *  elements of a range using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10]    = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- *
- *  struct is_odd
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x % 2;
- *    }
- *  };
- * 
- *  thrust::negate<int> op;
- *  thrust::identity<int> identity;
- *
- *  // negate odd elements
- *  thrust::transform_if(thrust::host, data, data + 10, data, op, is_odd()); // in-place transformation
- *
- *  // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator first, InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred);
-
-
-/*! This version of \p transform_if conditionally applies a unary function
- *  to each element of an input sequence and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in the input sequence
- *  satifies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first, last)</tt> the
- *  predicate <tt>pred(*i)</tt> is evaluated. If this predicate
- *  evaluates to \c true, the result of <tt>op(*i)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last - first) )</tt>. Otherwise, <tt>op(*i)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *                        and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type,
- *                        and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                        and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10]    = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- *
- *  struct is_odd
- *  {
- *    __host__ __device__
- *    bool operator()(int x)
- *    {
- *      return x % 2;
- *    }
- *  };
- * 
- *  thrust::negate<int> op;
- *  thrust::identity<int> identity;
- *
- *  // negate odd elements
- *  thrust::transform_if(data, data + 10, data, op, is_odd()); // in-place transformation
- *
- *  // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename InputIterator,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator first, InputIterator last,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred);
-
-
-/*! This version of \p transform_if conditionally applies a unary function
- *  to each element of an input sequence and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in a stencil sequence
- *  satisfies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first, last)</tt> the
- *  predicate <tt>pred(*s)</tt> is evaluated, where <tt>s</tt> is the corresponding input
- *  iterator in the range <tt>[stencil, stencil + (last - first) )</tt>. If this predicate
- *  evaluates to \c true, the result of <tt>op(*i)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last - first) )</tt>. Otherwise, <tt>op(*i)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                        and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *  \pre \p stencil may equal \p result, but the range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[10]    = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- *  int stencil[10] = { 1, 0, 1,  0, 1, 0, 1,  0, 1, 0};
- * 
- *  thrust::negate<int> op;
- *  thrust::identity<int> identity;
- *
- *  thrust::transform_if(thrust::host, data, data + 10, stencil, data, op, identity); // in-place transformation
- *
- *  // data is now {5, 0, -2, -3, -2,  4, 0, -1, -2,  8};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator1 first, InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred);
-
-
-/*! This version of \p transform_if conditionally applies a unary function
- *  to each element of an input sequence and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in a stencil sequence
- *  satisfies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first, last)</tt> the
- *  predicate <tt>pred(*s)</tt> is evaluated, where <tt>s</tt> is the corresponding input
- *  iterator in the range <tt>[stencil, stencil + (last - first) )</tt>. If this predicate
- *  evaluates to \c true, the result of <tt>op(*i)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last - first) )</tt>. Otherwise, <tt>op(*i)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *    
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the output sequence.
- *  \param op The tranformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                        and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *  \pre \p stencil may equal \p result, but the range <tt>[stencil, stencil + (last - first))</tt> shall not overlap the range <tt>[result, result + (last - first))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  
- *  int data[10]    = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
- *  int stencil[10] = { 1, 0, 1,  0, 1, 0, 1,  0, 1, 0};
- * 
- *  thrust::negate<int> op;
- *  thrust::identity<int> identity;
- *
- *  thrust::transform_if(data, data + 10, stencil, data, op, identity); // in-place transformation
- *
- *  // data is now {5, 0, -2, -3, -2,  4, 0, -1, -2,  8};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename ForwardIterator,
-         typename UnaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator1 first, InputIterator1 last,
-                               InputIterator2 stencil,
-                               ForwardIterator result,
-                               UnaryFunction op,
-                               Predicate pred);
-
-
-/*! This version of \p transform_if conditionally applies a binary function
- *  to each pair of elements from two input sequences and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in a stencil sequence
- *  satifies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first1, last1)</tt> and 
- *  <tt>j = first2 + (i - first1)</tt> in the range <tt>[first2, first2 + (last1 - first1) )</tt>,
- *  the predicate <tt>pred(*s)</tt> is evaluated, where <tt>s</tt> is the corresponding input
- *  iterator in the range <tt>[stencil, stencil + (last1 - first1) )</tt>. If this predicate
- *  evaluates to \c true, the result of <tt>binary_op(*i,*j)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last1 - first1) )</tt>. Otherwise, <tt>binary_op(*i,*j)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *    
- *  \param exec The execution policy to use for parallelization.
- *  \param first1 The beginning of the first input sequence.
- *  \param last1 The end of the first input sequence.
- *  \param first2 The beginning of the second input sequence.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the output sequence.
- *  \param binary_op The transformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                         and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first1 may equal \p result, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p first2 may equal \p result, but the range <tt>[first2, first2 + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p stencil may equal \p result, but the range <tt>[stencil, stencil + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int input1[6]  = {-5,  0,  2,  3,  2,  4};
- *  int input2[6]  = { 3,  6, -2,  1,  2,  3};
- *  int stencil[8] = { 1,  0,  1,  0,  1,  0};
- *  int output[6];
- * 
- *  thrust::plus<int> op;
- *  thrust::identity<int> identity;
- *
- *  thrust::transform_if(thrust::host, input1, input1 + 6, input2, stencil, output, op, identity);
- *
- *  // output is now {-2,  0,  0,  3,  4,  4};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                               InputIterator1 first1, InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred);
-
-
-/*! This version of \p transform_if conditionally applies a binary function
- *  to each pair of elements from two input sequences and stores the result in the corresponding 
- *  position in an output sequence if the corresponding position in a stencil sequence
- *  satifies a predicate. Otherwise, the corresponding position in the
- *  output sequence is not modified.
- *
- *  Specifically, for each iterator <tt>i</tt> in the range <tt>[first1, last1)</tt> and 
- *  <tt>j = first2 + (i - first1)</tt> in the range <tt>[first2, first2 + (last1 - first1) )</tt>,
- *  the predicate <tt>pred(*s)</tt> is evaluated, where <tt>s</tt> is the corresponding input
- *  iterator in the range <tt>[stencil, stencil + (last1 - first1) )</tt>. If this predicate
- *  evaluates to \c true, the result of <tt>binary_op(*i,*j)</tt> is assigned to <tt>*o</tt>,
- *  where <tt>o</tt> is the corresponding output iterator in the range
- *  <tt>[result, result + (last1 - first1) )</tt>. Otherwise, <tt>binary_op(*i,*j)</tt> is
- *  not evaluated and no assignment occurs. The input and output sequences may coincide,
- *  resulting in an in-place transformation.
- *    
- *  \param first1 The beginning of the first input sequence.
- *  \param last1 The end of the first input sequence.
- *  \param first2 The beginning of the second input sequence.
- *  \param stencil The beginning of the stencil sequence.
- *  \param result The beginning of the output sequence.
- *  \param binary_op The transformation operation.
- *  \param pred The predicate operation.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type.
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                         and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                         and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam Predicate is a model of <a href="http://www.sgi.com/tech/stl/Predicate.html">Predicate</a>.
- *
- *  \pre \p first1 may equal \p result, but the range <tt>[first1, last1)</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p first2 may equal \p result, but the range <tt>[first2, first2 + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *  \pre \p stencil may equal \p result, but the range <tt>[stencil, stencil + (last1 - first1))</tt> shall not overlap the range <tt>[result, result + (last1 - first1))</tt> otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_if:
- *
- *  \code
- *  #include <thrust/transform.h>
- *  #include <thrust/functional.h>
- *  
- *  int input1[6]  = {-5,  0,  2,  3,  2,  4};
- *  int input2[6]  = { 3,  6, -2,  1,  2,  3};
- *  int stencil[8] = { 1,  0,  1,  0,  1,  0};
- *  int output[6];
- * 
- *  thrust::plus<int> op;
- *  thrust::identity<int> identity;
- *
- *  thrust::transform_if(input1, input1 + 6, input2, stencil, output, op, identity);
- *
- *  // output is now {-2,  0,  0,  3,  4,  4};
- *  \endcode
- *
- *  \see thrust::transform
- */
-template<typename InputIterator1,
-         typename InputIterator2,
-         typename InputIterator3,
-         typename ForwardIterator,
-         typename BinaryFunction,
-         typename Predicate>
-  ForwardIterator transform_if(InputIterator1 first1, InputIterator1 last1,
-                               InputIterator2 first2,
-                               InputIterator3 stencil,
-                               ForwardIterator result,
-                               BinaryFunction binary_op,
-                               Predicate pred);
-
-
-/*! \} // end transformations
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/transform.inl>
-
diff --git a/compat/thrust/transform_reduce.h b/compat/thrust/transform_reduce.h
deleted file mode 100644
index 3ef5efd6aa..0000000000
--- a/compat/thrust/transform_reduce.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform_reduce.h
- *  \brief Fused transform / reduction
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup reductions
- *  \{
- *  \addtogroup transformed_reductions Transformed Reductions
- *  \ingroup reductions
- *  \{
- */
-
-
-/*! \p transform_reduce fuses the \p transform and \p reduce operations.
- *  \p transform_reduce is equivalent to performing a transformation defined by
- *  \p unary_op into a temporary sequence and then performing \p reduce on the
- *  transformed sequence. In most cases, fusing these two operations together is
- *  more efficient, since fewer memory reads and writes are required.
- *
- *  \p transform_reduce performs a reduction on the transformation of the
- *  sequence <tt>[first, last)</tt> according to \p unary_op. Specifically,
- *  \p unary_op is applied to each element of the sequence and then the result
- *  is reduced to a single value with \p binary_op using the initial value 
- *  \p init.  Note that the transformation \p unary_op is not applied to 
- *  the initial value \p init.  The order of reduction is not specified, 
- *  so \p binary_op must be both commutative and associative. 
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param unary_op The function to apply to each element of the input sequence.
- *  \param init The result is initialized to this value.
- *  \param binary_op The reduction operation.
- *  \return The result of the transformed reduction.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>,
- *          and \p UnaryFunction's \c result_type is convertible to \c OutputType.
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
- *
- *  The following code snippet demonstrates how to use \p transform_reduce
- *  to compute the maximum value of the absolute value of the elements
- *  of a range using the \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform_reduce.h>
- *  #include <thrust/functional.h>
- *  #include <thrust/execution_policy.h>
- *
- *  template<typename T>
- *  struct absolute_value : public unary_function<T,T>
- *  {
- *    __host__ __device__ T operator()(const T &x) const
- *    {
- *      return x < T(0) ? -x : x;
- *    }
- *  };
- *
- *  ...
- *
- *  int data[6] = {-1, 0, -2, -2, 1, -3};
- *  int result = thrust::transform_reduce(thrust::host,
- *                                        data, data + 6,
- *                                        absolute_value<int>(),
- *                                        0,
- *                                        thrust::maximum<int>());
- *  // result == 3
- *  \endcode
- *
- *  \see \c transform
- *  \see \c reduce
- */
-template<typename DerivedPolicy,
-         typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                              InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op);
-
-
-/*! \p transform_reduce fuses the \p transform and \p reduce operations.
- *  \p transform_reduce is equivalent to performing a transformation defined by
- *  \p unary_op into a temporary sequence and then performing \p reduce on the
- *  transformed sequence. In most cases, fusing these two operations together is
- *  more efficient, since fewer memory reads and writes are required.
- *
- *  \p transform_reduce performs a reduction on the transformation of the
- *  sequence <tt>[first, last)</tt> according to \p unary_op. Specifically,
- *  \p unary_op is applied to each element of the sequence and then the result
- *  is reduced to a single value with \p binary_op using the initial value 
- *  \p init.  Note that the transformation \p unary_op is not applied to 
- *  the initial value \p init.  The order of reduction is not specified, 
- *  so \p binary_op must be both commutative and associative. 
- *
- *  \param first The beginning of the sequence.
- *  \param last The end of the sequence.
- *  \param unary_op The function to apply to each element of the input sequence.
- *  \param init The result is initialized to this value.
- *  \param binary_op The reduction operation.
- *  \return The result of the transformed reduction.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>,
- *          and \p UnaryFunction's \c result_type is convertible to \c OutputType.
- *  \tparam OutputType is a model of <a href="http://www.sgi.com/tech/stl/Assignable.html">Assignable</a>,
- *          and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type.
- *  \tparam BinaryFunction is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>,
- *          and \p BinaryFunction's \c result_type is convertible to \p OutputType.
- *
- *  The following code snippet demonstrates how to use \p transform_reduce
- *  to compute the maximum value of the absolute value of the elements
- *  of a range.
- *
- *  \code
- *  #include <thrust/transform_reduce.h>
- *  #include <thrust/functional.h>
- *
- *  template<typename T>
- *  struct absolute_value : public unary_function<T,T>
- *  {
- *    __host__ __device__ T operator()(const T &x) const
- *    {
- *      return x < T(0) ? -x : x;
- *    }
- *  };
- *
- *  ...
- *
- *  int data[6] = {-1, 0, -2, -2, 1, -3};
- *  int result = thrust::transform_reduce(data, data + 6,
- *                                        absolute_value<int>(),
- *                                        0,
- *                                        thrust::maximum<int>());
- *  // result == 3
- *  \endcode
- *
- *  \see \c transform
- *  \see \c reduce
- */
-template<typename InputIterator, 
-         typename UnaryFunction, 
-         typename OutputType,
-         typename BinaryFunction>
-  OutputType transform_reduce(InputIterator first,
-                              InputIterator last,
-                              UnaryFunction unary_op,
-                              OutputType init,
-                              BinaryFunction binary_op);
-
-
-/*! \} // end transformed_reductions
- *  \} // end reductions
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/transform_reduce.inl>
-
diff --git a/compat/thrust/transform_scan.h b/compat/thrust/transform_scan.h
deleted file mode 100644
index e9943e401f..0000000000
--- a/compat/thrust/transform_scan.h
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file transform_scan.h
- *  \brief Fused transform / prefix-sum
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup algorithms
- */
-
-/*! \addtogroup prefixsums Prefix Sums
- *  \ingroup algorithms
- *  \{
- */
-	
-/*! \addtogroup transformed_prefixsums Transformed Prefix Sums
- *  \ingroup prefixsums
- *  \{
- */
-
-
-/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan
- *  operations.  \p transform_inclusive_scan is equivalent to performing a
- *  tranformation defined by \p unary_op into a temporary sequence and then
- *  performing an \p inclusive_scan on the tranformed sequence.  In most
- *  cases, fusing these two operations together is more efficient, since
- *  fewer memory reads and writes are required. In \p transform_inclusive_scan,
- *  <tt>unary_op(\*first)</tt> is assigned to <tt>\*result</tt> and the result
- *  of <tt>binary_op(unary_op(\*first), unary_op(\*(first + 1)))</tt> is
- *  assigned to <tt>\*(result + 1)</tt>, and so on.  The transform scan
- *  operation is permitted to be in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param unary_op The function used to tranform the input sequence.
- *  \param binary_op The associatve operator used to 'sum' transformed values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                               and \c InputIterator's \c value_type is convertible to \c unary_op's input type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                               and accepts inputs of \c InputIterator's \c value_type.  \c UnaryFunction's result_type
- *                               is convertable to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_inclusive_scan using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform_scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::negate<int> unary_op;
- *  thrust::plus<int> binary_op;
- *
- *  thrust::transform_inclusive_scan(thrust::host, data, data + 6, data, unary_op, binary_op); // in-place scan
- *
- *  // data is now {-1, -1, -3, -5, -6, -9}
- *  \endcode
- *
- *  \see \p transform
- *  \see \p inclusive_scan
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename AssociativeOperator>
-  OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          AssociativeOperator binary_op);
-
-
-/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan
- *  operations.  \p transform_inclusive_scan is equivalent to performing a
- *  tranformation defined by \p unary_op into a temporary sequence and then
- *  performing an \p inclusive_scan on the tranformed sequence.  In most
- *  cases, fusing these two operations together is more efficient, since
- *  fewer memory reads and writes are required. In \p transform_inclusive_scan,
- *  <tt>unary_op(\*first)</tt> is assigned to <tt>\*result</tt> and the result
- *  of <tt>binary_op(unary_op(\*first), unary_op(\*(first + 1)))</tt> is
- *  assigned to <tt>\*(result + 1)</tt>, and so on.  The transform scan
- *  operation is permitted to be in-place.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param unary_op The function used to tranform the input sequence.
- *  \param binary_op The associatve operator used to 'sum' transformed values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                               and \c InputIterator's \c value_type is convertible to \c unary_op's input type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                               and accepts inputs of \c InputIterator's \c value_type.  \c UnaryFunction's result_type
- *                               is convertable to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_inclusive_scan
- *
- *  \code
- *  #include <thrust/transform_scan.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::negate<int> unary_op;
- *  thrust::plus<int> binary_op;
- *
- *  thrust::transform_inclusive_scan(data, data + 6, data, unary_op, binary_op); // in-place scan
- *
- *  // data is now {-1, -1, -3, -5, -6, -9}
- *  \endcode
- *
- *  \see \p transform
- *  \see \p inclusive_scan
- *
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename AssociativeOperator>
-  OutputIterator transform_inclusive_scan(InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          AssociativeOperator binary_op);
-
-
-/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan
- *  operations.  \p transform_exclusive_scan is equivalent to performing a
- *  tranformation defined by \p unary_op into a temporary sequence and then
- *  performing an \p exclusive_scan on the tranformed sequence.  In most
- *  cases, fusing these two operations together is more efficient, since
- *  fewer memory reads and writes are required. In 
- *  \p transform_exclusive_scan, \p init is assigned to <tt>\*result</tt> 
- *  and the result of <tt>binary_op(init, unary_op(\*first))</tt> is assigned
- *  to <tt>\*(result + 1)</tt>, and so on.  The transform scan operation is 
- *  permitted to be in-place.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param unary_op The function used to tranform the input sequence.
- *  \param init The initial value of the \p exclusive_scan
- *  \param binary_op The associatve operator used to 'sum' transformed values.
- *  \return The end of the output sequence.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                               and \c InputIterator's \c value_type is convertible to \c unary_op's input type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                               and accepts inputs of \c InputIterator's \c value_type.  \c UnaryFunction's result_type
- *                               is convertable to \c OutputIterator's \c value_type.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_exclusive_scan using the
- *  \p thrust::host execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/transform_scan.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::negate<int> unary_op;
- *  thrust::plus<int> binary_op;
- *
- *  thrust::transform_exclusive_scan(thrust::host, data, data + 6, data, unary_op, 4, binary_op); // in-place scan
- *
- *  // data is now {4, 3, 3, 1, -1, -2}
- *  \endcode
- *
- *  \see \p transform
- *  \see \p exclusive_scan
- *
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                          InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op);
-
-
-/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan
- *  operations.  \p transform_exclusive_scan is equivalent to performing a
- *  tranformation defined by \p unary_op into a temporary sequence and then
- *  performing an \p exclusive_scan on the tranformed sequence.  In most
- *  cases, fusing these two operations together is more efficient, since
- *  fewer memory reads and writes are required. In 
- *  \p transform_exclusive_scan, \p init is assigned to <tt>\*result</tt> 
- *  and the result of <tt>binary_op(init, unary_op(\*first))</tt> is assigned
- *  to <tt>\*(result + 1)</tt>, and so on.  The transform scan operation is 
- *  permitted to be in-place.
- *
- *  \param first The beginning of the input sequence.
- *  \param last The end of the input sequence.
- *  \param result The beginning of the output sequence.
- *  \param unary_op The function used to tranform the input sequence.
- *  \param init The initial value of the \p exclusive_scan
- *  \param binary_op The associatve operator used to 'sum' transformed values.
- *  \return The end of the output sequence.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>
- *                               and \c InputIterator's \c value_type is convertible to \c unary_op's input type.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a>.
- *  \tparam UnaryFunction is a model of <a href="http://www.sgi.com/tech/stl/UnaryFunction.html">Unary Function</a>
- *                               and accepts inputs of \c InputIterator's \c value_type.  \c UnaryFunction's result_type
- *                               is convertable to \c OutputIterator's \c value_type.
- *  \tparam T is convertible to \c OutputIterator's \c value_type.
- *  \tparam AssociativeOperator is a model of <a href="http://www.sgi.com/tech/stl/BinaryFunction.html">Binary Function</a>
- *                              and \c AssociativeOperator's \c result_type is
- *                              convertible to \c OutputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p transform_exclusive_scan
- *
- *  \code
- *  #include <thrust/transform_scan.h>
- *  
- *  int data[6] = {1, 0, 2, 2, 1, 3};
- *
- *  thrust::negate<int> unary_op;
- *  thrust::plus<int> binary_op;
- *
- *  thrust::transform_exclusive_scan(data, data + 6, data, unary_op, 4, binary_op); // in-place scan
- *
- *  // data is now {4, 3, 3, 1, -1, -2}
- *  \endcode
- *
- *  \see \p transform
- *  \see \p exclusive_scan
- *
- */
-template<typename InputIterator,
-         typename OutputIterator,
-         typename UnaryFunction,
-         typename T,
-         typename AssociativeOperator>
-  OutputIterator transform_exclusive_scan(InputIterator first,
-                                          InputIterator last,
-                                          OutputIterator result,
-                                          UnaryFunction unary_op,
-                                          T init,
-                                          AssociativeOperator binary_op);
-
-
-/*! \} // end transformed_prefixsums
- */
-
-
-/*! \} // end prefixsums
- */
-
-	
-} // end namespace thrust
-
-#include <thrust/detail/transform_scan.inl>
-
diff --git a/compat/thrust/tuple.h b/compat/thrust/tuple.h
deleted file mode 100644
index 3961d982fa..0000000000
--- a/compat/thrust/tuple.h
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file tuple.h
- *  \brief A type encapsulating a heterogeneous collection of elements
- */
-
-/*
- * Copyright (C) 1999, 2000 Jaakko Järvi (jaakko.jarvi@cs.utu.fi)
- * 
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying NOTICE file for the complete license)
- *
- * For more information, see http://www.boost.org
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/tuple.inl>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-/*! \addtogroup utility
- *  \{
- */
-
-/*! \addtogroup tuple
- *  \{
- */
-
-/*! \cond
- */
-
-struct null_type;
-
-/*! \endcond
- */
-
-/*! This metafunction returns the type of a
- *  \p tuple's <tt>N</tt>th element.
- *
- *  \tparam N This parameter selects the element of interest.
- *  \tparam T A \c tuple type of interest.
- *
- *  \see pair
- *  \see tuple
- */
-template<int N, class T>
-  struct tuple_element
-{
-  private:
-    typedef typename T::tail_type Next;
-
-  public:
-    /*! The result of this metafunction is returned in \c type.
-     */
-    typedef typename tuple_element<N-1, Next>::type type;
-}; // end tuple_element
-
-/*! This metafunction returns the number of elements
- *  of a \p tuple type of interest.
- *
- *  \tparam T A \c tuple type of interest.
- *
- *  \see pair
- *  \see tuple
- */
-template<class T>
-  struct tuple_size
-{
-  /*! The result of this metafunction is returned in \c value.
-   */
-  static const int value = 1 + tuple_size<typename T::tail_type>::value;
-}; // end tuple_size
-
-// get function for non-const cons-lists, returns a reference to the element
-
-/*! The \p get function returns a reference to a \p tuple element of
- *  interest.
- *
- *  \param t A reference to a \p tuple of interest.
- *  \return A reference to \p t's <tt>N</tt>th element.
- *
- *  \tparam N The index of the element of interest.
- *
- *  The following code snippet demonstrates how to use \p get to print
- *  the value of a \p tuple element.
- *
- *  \code
- *  #include <thrust/tuple.h>
- *  #include <iostream>
- *  ...
- *  thrust::tuple<int, const char *> t(13, "thrust");
- *
- *  std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl;
- *  \endcode
- *
- *  \see pair
- *  \see tuple
- */
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::non_const_type
-get(detail::cons<HT, TT>& t);
-
-
-/*! The \p get function returns a \c const reference to a \p tuple element of
- *  interest.
- *
- *  \param t A reference to a \p tuple of interest.
- *  \return A \c const reference to \p t's <tt>N</tt>th element.
- *
- *  \tparam N The index of the element of interest.
- *
- *  The following code snippet demonstrates how to use \p get to print
- *  the value of a \p tuple element.
- *
- *  \code
- *  #include <thrust/tuple.h>
- *  #include <iostream>
- *  ...
- *  thrust::tuple<int, const char *> t(13, "thrust");
- *
- *  std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl;
- *  \endcode
- *
- *  \see pair
- *  \see tuple
- */
-template<int N, class HT, class TT>
-__host__ __device__
-inline typename access_traits<
-                  typename tuple_element<N, detail::cons<HT, TT> >::type
-                >::const_type
-get(const detail::cons<HT, TT>& t);
-
-
-
-/*! \p tuple is a class template that can be instantiated with up to ten arguments.
- *  Each template argument specifies the type of element in the \p tuple.
- *  Consequently, tuples are heterogeneous, fixed-size collections of values. An
- *  instantiation of \p tuple with two arguments is similar to an instantiation
- *  of \p pair with the same two arguments. Individual elements of a \p tuple may
- *  be accessed with the \p get function.
- *
- *  \tparam TN The type of the <tt>N</tt> \c tuple element. Thrust's \p tuple
- *          type currently supports up to ten elements.
- *
- *  The following code snippet demonstrates how to create a new \p tuple object
- *  and inspect and modify the value of its elements.
- *
- *  \code
- *  #include <thrust/tuple.h>
- *  #include <iostream>
- *  ...
- *  // create a tuple containing an int, a float, and a string
- *  thrust::tuple<int, float, const char*> t(13, 0.1f, "thrust");
- *
- *  // individual members are accessed with the free function get
- *  std::cout << "The first element's value is " << thrust::get<0>(t) << std::endl; 
- *
- *  // or the member function get
- *  std::cout << "The second element's value is " << t.get<1>() << std::endl;
- *
- *  // we can also modify elements with the same function
- *  thrust::get<0>(t) += 10;
- *  \endcode
- *
- *  \see pair
- *  \see get
- *  \see make_tuple
- *  \see tuple_element
- *  \see tuple_size
- *  \see tie
- */
-template <class T0, class T1, class T2, class T3, class T4,
-          class T5, class T6, class T7, class T8, class T9>
-  class tuple :
-    public detail::map_tuple_to_cons<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::type
-{
-  /*! \cond
-   */
-
-  private:
-  typedef typename detail::map_tuple_to_cons<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::type inherited;
-
-  /*! \endcond
-   */
-
-  public:
-  /*! \p tuple's no-argument constructor initializes each element.
-   */
-  inline __host__ __device__
-  tuple(void) {}
-
-  /*! \p tuple's one-argument constructor copy constructs the first element from the given parameter
-   *     and intializes all other elements.
-   *  \param t0 The value to assign to this \p tuple's first element.
-   */
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0)
-    : inherited(t0,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  /*! \p tuple's one-argument constructor copy constructs the first two elements from the given parameters
-   *     and intializes all other elements.
-   *  \param t0 The value to assign to this \p tuple's first element.
-   *  \param t1 The value to assign to this \p tuple's second element.
-   *  \note \p tuple's constructor has ten variants of this form, the rest of which are ommitted here for brevity.
-   */
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1)
-    : inherited(t0, t1,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  /*! \cond
-   */
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2)
-    : inherited(t0, t1, t2,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3)
-    : inherited(t0, t1, t2, t3,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4)
-    : inherited(t0, t1, t2, t3, t4,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4,
-        typename access_traits<T5>::parameter_type t5)
-    : inherited(t0, t1, t2, t3, t4, t5,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4,
-        typename access_traits<T5>::parameter_type t5,
-        typename access_traits<T6>::parameter_type t6)
-    : inherited(t0, t1, t2, t3, t4, t5, t6,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4,
-        typename access_traits<T5>::parameter_type t5,
-        typename access_traits<T6>::parameter_type t6,
-        typename access_traits<T7>::parameter_type t7)
-    : inherited(t0, t1, t2, t3, t4, t5, t6, t7,
-                static_cast<const null_type&>(null_type()),
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4,
-        typename access_traits<T5>::parameter_type t5,
-        typename access_traits<T6>::parameter_type t6,
-        typename access_traits<T7>::parameter_type t7,
-        typename access_traits<T8>::parameter_type t8)
-    : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8,
-                static_cast<const null_type&>(null_type())) {}
-
-  inline __host__ __device__ 
-  tuple(typename access_traits<T0>::parameter_type t0,
-        typename access_traits<T1>::parameter_type t1,
-        typename access_traits<T2>::parameter_type t2,
-        typename access_traits<T3>::parameter_type t3,
-        typename access_traits<T4>::parameter_type t4,
-        typename access_traits<T5>::parameter_type t5,
-        typename access_traits<T6>::parameter_type t6,
-        typename access_traits<T7>::parameter_type t7,
-        typename access_traits<T8>::parameter_type t8,
-        typename access_traits<T9>::parameter_type t9)
-    : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) {}
-
-
-  template<class U1, class U2>
-  inline __host__ __device__ 
-  tuple(const detail::cons<U1, U2>& p) : inherited(p) {}
-
-  template <class U1, class U2>
-  inline __host__ __device__ 
-  tuple& operator=(const detail::cons<U1, U2>& k)
-  {
-    inherited::operator=(k);
-    return *this;
-  }
-
-  /*! \endcond
-   */
-
-  /*! This assignment operator allows assigning the first two elements of this \p tuple from a \p pair.
-   *  \param k A \p pair to assign from.
-   */
-  template <class U1, class U2>
-  __host__ __device__ inline
-  tuple& operator=(const thrust::pair<U1, U2>& k) {
-    //BOOST_STATIC_ASSERT(length<tuple>::value == 2);// check_length = 2
-    this->head = k.first;
-    this->tail.head = k.second;
-    return *this;
-  }
-
-  /*! \p swap swaps the elements of two <tt>tuple</tt>s.
-   *
-   *  \param t The other <tt>tuple</tt> with which to swap.
-   */
-  inline __host__ __device__
-  void swap(tuple &t)
-  {
-    inherited::swap(t);
-  }
-};
-
-/*! \cond
- */
-
-template <>
-class tuple<null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type, null_type>  :
-  public null_type
-{
-public:
-  typedef null_type inherited;
-};
-
-/*! \endcond
- */
-
-
-/*! This version of \p make_tuple creates a new \c tuple object from a
- *  single object.
- *
- *  \param t0 The object to copy from.
- *  \return A \p tuple object with a single member which is a copy of \p t0.
- */
-template<class T0>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0>::type
-    make_tuple(const T0& t0);
-
-/*! This version of \p make_tuple creates a new \c tuple object from two
- *  objects.
- *
- *  \param t0 The first object to copy from.
- *  \param t1 The second object to copy from.
- *  \return A \p tuple object with two members which are copies of \p t0
- *          and \p t1.
- *
- *  \note \p make_tuple has ten variants, the rest of which are omitted here
- *        for brevity.
- */
-template<class T0, class T1>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1>::type
-    make_tuple(const T0& t0, const T1& t1);
-
-/*! This version of \p tie creates a new \c tuple whose single element is
- *  a reference which refers to this function's argument.
- *
- *  \param t0 The object to reference.
- *  \return A \p tuple object with one member which is a reference to \p t0.
- */
-template<typename T0>
-__host__ __device__ inline
-tuple<T0&> tie(T0& t0);
-
-/*! This version of \p tie creates a new \c tuple of references object which
- *  refers to this function's arguments.
- *
- *  \param t0 The first object to reference.
- *  \param t1 The second object to reference.
- *  \return A \p tuple object with two members which are references to \p t0
- *          and \p t1.
- *
- *  \note \p tie has ten variants, the rest of which are omitted here for
- *           brevity.
- */
-template<typename T0, typename T1>
-__host__ __device__ inline
-tuple<T0&,T1&> tie(T0& t0, T1& t1);
-
-/*! \p swap swaps the contents of two <tt>tuple</tt>s.
- *
- *  \param x The first \p tuple to swap.
- *  \param y The second \p tuple to swap.
- */
-template<
-  typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9,
-  typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9
->
-inline __host__ __device__
-void swap(tuple<T0,T1,T2,T3,T4,T5,T6,T7,T8,T9> &x,
-          tuple<U0,U1,U2,U3,U4,U5,U6,U7,U8,U9> &y);
-
-
-
-/*! \cond
- */
-
-template<class T0, class T1, class T2>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2);
-
-template<class T0, class T1, class T2, class T3>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3);
-
-template<class T0, class T1, class T2, class T3, class T4>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4);
-
-template<class T0, class T1, class T2, class T3, class T4, class T5>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5);
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6);
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7);
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7, T8>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8);
-
-template<class T0, class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9>
-__host__ __device__ inline
-  typename detail::make_tuple_mapper<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::type
-    make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9);
-
-template<typename T0, typename T1, typename T2>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&> tie(T0 &t0, T1 &t1, T2 &t2);
-
-template<typename T0, typename T1, typename T2, typename T3>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8);
-
-template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
-__host__ __device__ inline
-tuple<T0&,T1&,T2&,T3&,T4&,T5&,T6&,T7&,T8&,T9&> tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9);
-
-
-__host__ __device__ inline
-bool operator==(const null_type&, const null_type&);
-
-__host__ __device__ inline
-bool operator>=(const null_type&, const null_type&);
-
-__host__ __device__ inline
-bool operator<=(const null_type&, const null_type&);
-
-__host__ __device__ inline
-bool operator!=(const null_type&, const null_type&);
-
-__host__ __device__ inline
-bool operator<(const null_type&, const null_type&);
-
-__host__ __device__ inline
-bool operator>(const null_type&, const null_type&);
-
-/*! \endcond
- */
-
-/*! \} // tuple
- */
-
-/*! \} // utility
- */
-
-} // end thrust
-
diff --git a/compat/thrust/uninitialized_copy.h b/compat/thrust/uninitialized_copy.h
deleted file mode 100644
index 77b673c425..0000000000
--- a/compat/thrust/uninitialized_copy.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uninitialized_copy.h
- *  \brief Copy construction into a range of uninitialized elements from a source range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup copying
- *  \{
- */
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a constructor.
- *  Occasionally, however, it is useful to separate those two operations.
- *  If each iterator in the range <tt>[result, result + (last - first))</tt> points
- *  to uninitialized memory, then \p uninitialized_copy creates a copy of
- *  <tt>[first, last)</tt> in that range. That is, for each iterator \c i in
- *  the input, \p uninitialized_copy creates a copy of \c *i in the location pointed
- *  to by the corresponding iterator in the output range by \p ForwardIterator's
- *  \c value_type's copy constructor with *i as its argument.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the input range to copy from.
- *  \param last The last element of the input range to copy from.
- *  \param result The first element of the output range to copy to.
- *  \return An iterator pointing to the last element of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes
- *          a single argument whose type is \p InputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_copy to initialize
- *  a range of uninitialized memory using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/uninitialized_copy.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_vector<Int> input(N, val);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_copy(thrust::device, input.begin(), input.end(), array);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_copy.html
- *  \see \c copy
- *  \see \c uninitialized_fill
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename DerivedPolicy, typename InputIterator, typename ForwardIterator>
-  ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                     InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a constructor.
- *  Occasionally, however, it is useful to separate those two operations.
- *  If each iterator in the range <tt>[result, result + (last - first))</tt> points
- *  to uninitialized memory, then \p uninitialized_copy creates a copy of
- *  <tt>[first, last)</tt> in that range. That is, for each iterator \c i in
- *  the input, \p uninitialized_copy creates a copy of \c *i in the location pointed
- *  to by the corresponding iterator in the output range by \p ForwardIterator's
- *  \c value_type's copy constructor with *i as its argument.
- *
- *  \param first The first element of the input range to copy from.
- *  \param last The last element of the input range to copy from.
- *  \param result The first element of the output range to copy to.
- *  \return An iterator pointing to the last element of the output range.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes
- *          a single argument whose type is \p InputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_copy to initialize
- *  a range of uninitialized memory.
- *
- *  \code
- *  #include <thrust/uninitialized_copy.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_vector.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_vector<Int> input(N, val);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_copy(input.begin(), input.end(), array);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_copy.html
- *  \see \c copy
- *  \see \c uninitialized_fill
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename InputIterator, typename ForwardIterator>
-  ForwardIterator uninitialized_copy(InputIterator first,
-                                     InputIterator last,
-                                     ForwardIterator result);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a constructor.
- *  Occasionally, however, it is useful to separate those two operations.
- *  If each iterator in the range <tt>[result, result + n)</tt> points
- *  to uninitialized memory, then \p uninitialized_copy_n creates a copy of
- *  <tt>[first, first + n)</tt> in that range. That is, for each iterator \c i in
- *  the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed
- *  to by the corresponding iterator in the output range by \p InputIterator's
- *  \c value_type's copy constructor with *i as its argument.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the input range to copy from.
- *  \param n The number of elements to copy.
- *  \param result The first element of the output range to copy to.
- *  \return An iterator pointing to the last element of the output range.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Size is an integral type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes
- *          a single argument whose type is \p InputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, first + n)</tt> and the range <tt>[result, result + n)</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_copy to initialize
- *  a range of uninitialized memory using the \p thrust::device execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/uninitialized_copy.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_vector.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_vector<Int> input(N, val);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_copy_n(thrust::device, input.begin(), N, array);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_copy.html
- *  \see \c uninitialized_copy
- *  \see \c copy
- *  \see \c uninitialized_fill
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename DerivedPolicy, typename InputIterator, typename Size, typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       InputIterator first,
-                                       Size n,
-                                       ForwardIterator result);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a constructor.
- *  Occasionally, however, it is useful to separate those two operations.
- *  If each iterator in the range <tt>[result, result + n)</tt> points
- *  to uninitialized memory, then \p uninitialized_copy_n creates a copy of
- *  <tt>[first, first + n)</tt> in that range. That is, for each iterator \c i in
- *  the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed
- *  to by the corresponding iterator in the output range by \p InputIterator's
- *  \c value_type's copy constructor with *i as its argument.
- *
- *  \param first The first element of the input range to copy from.
- *  \param n The number of elements to copy.
- *  \param result The first element of the output range to copy to.
- *  \return An iterator pointing to the last element of the output range.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>.
- *  \tparam Size is an integral type.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes
- *          a single argument whose type is \p InputIterator's \c value_type.
- *
- *  \pre \p first may equal \p result, but the range <tt>[first, first + n)</tt> and the range <tt>[result, result + n)</tt> shall not overlap otherwise.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_copy to initialize
- *  a range of uninitialized memory.
- *
- *  \code
- *  #include <thrust/uninitialized_copy.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/device_vector.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_vector<Int> input(N, val);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_copy_n(input.begin(), N, array);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_copy.html
- *  \see \c uninitialized_copy
- *  \see \c copy
- *  \see \c uninitialized_fill
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename InputIterator, typename Size, typename ForwardIterator>
-  ForwardIterator uninitialized_copy_n(InputIterator first,
-                                       Size n,
-                                       ForwardIterator result);
-
-
-/*! \} // copying
- */
-
-
-} // end thrust
-
-#include <thrust/detail/uninitialized_copy.inl>
-
diff --git a/compat/thrust/uninitialized_fill.h b/compat/thrust/uninitialized_fill.h
deleted file mode 100644
index c726241ddd..0000000000
--- a/compat/thrust/uninitialized_fill.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file uninitialized_fill.h
- *  \brief Copy construction into a range of uninitialized elements from a source value
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup filling
- *  \ingroup transformations
- *  \{
- */
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a
- *  constructor. Occasionally, however, it is useful to separate those two
- *  operations. If each iterator in the range <tt>[first, last)</tt> points
- *  to unitialized memory, then \p unitialized_fill creates copies of \c x
- *  in that range. That is, for each iterator \c i in the range <tt>[first, last)</tt>,
- *  \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by
- *  calling \p ForwardIterator's \c value_type's copy constructor.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *  
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the range of interest.
- *  \param last The last element of the range of interest.
- *  \param x The value to use as the exemplar of the copy constructor.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that
- *          takes a single argument of type \p T.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of
- *  uninitialized memory using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/uninitialized_fill.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_fill(thrust::device, array, array + N, val);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_fill.html
- *  \see \c uninitialized_fill_n
- *  \see \c fill
- *  \see \c uninitialized_copy
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename T>
-  void uninitialized_fill(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                          ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a
- *  constructor. Occasionally, however, it is useful to separate those two
- *  operations. If each iterator in the range <tt>[first, last)</tt> points
- *  to unitialized memory, then \p unitialized_fill creates copies of \c x
- *  in that range. That is, for each iterator \c i in the range <tt>[first, last)</tt>,
- *  \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by
- *  calling \p ForwardIterator's \c value_type's copy constructor.
- *  
- *  \param first The first element of the range of interest.
- *  \param last The last element of the range of interest.
- *  \param x The value to use as the exemplar of the copy constructor.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that
- *          takes a single argument of type \p T.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of
- *  uninitialized memory.
- *
- *  \code
- *  #include <thrust/uninitialized_fill.h>
- *  #include <thrust/device_malloc.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_fill(array, array + N, val);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_fill.html
- *  \see \c uninitialized_fill_n
- *  \see \c fill
- *  \see \c uninitialized_copy
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename ForwardIterator, typename T>
-  void uninitialized_fill(ForwardIterator first,
-                          ForwardIterator last,
-                          const T &x);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a
- *  constructor. Occasionally, however, it is useful to separate those two
- *  operations. If each iterator in the range <tt>[first, first+n)</tt> points
- *  to unitialized memory, then \p unitialized_fill creates copies of \c x
- *  in that range. That is, for each iterator \c i in the range <tt>[first, first+n)</tt>,
- *  \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by
- *  calling \p ForwardIterator's \c value_type's copy constructor.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *  
- *  \param exec The execution policy to use for parallelization.
- *  \param first The first element of the range of interest.
- *  \param n The size of the range of interest.
- *  \param x The value to use as the exemplar of the copy constructor.
- *  \return <tt>first+n</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that
- *          takes a single argument of type \p T.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of
- *  uninitialized memory using the \p thrust::device execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/uninitialized_fill.h>
- *  #include <thrust/device_malloc.h>
- *  #include <thrust/execution_policy.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_fill_n(thrust::device, array, N, val);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_fill.html
- *  \see \c uninitialized_fill
- *  \see \c fill
- *  \see \c uninitialized_copy_n
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename DerivedPolicy, typename ForwardIterator, typename Size, typename T>
-  ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                                       ForwardIterator first,
-                                       Size n,
-                                       const T &x);
-
-
-/*! In \c thrust, the function \c thrust::device_new allocates memory for
- *  an object and then creates an object at that location by calling a
- *  constructor. Occasionally, however, it is useful to separate those two
- *  operations. If each iterator in the range <tt>[first, first+n)</tt> points
- *  to unitialized memory, then \p unitialized_fill creates copies of \c x
- *  in that range. That is, for each iterator \c i in the range <tt>[first, first+n)</tt>,
- *  \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by
- *  calling \p ForwardIterator's \c value_type's copy constructor.
- *  
- *  \param first The first element of the range of interest.
- *  \param n The size of the range of interest.
- *  \param x The value to use as the exemplar of the copy constructor.
- *  \return <tt>first+n</tt>
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator">Forward Iterator</a>,
- *          \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that
- *          takes a single argument of type \p T.
- *
- *  The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of
- *  uninitialized memory.
- *
- *  \code
- *  #include <thrust/uninitialized_fill.h>
- *  #include <thrust/device_malloc.h>
- *  
- *  struct Int
- *  {
- *    __host__ __device__
- *    Int(int x) : val(x) {}
- *    int val;
- *  };  
- *  ...
- *  const int N = 137;
- *
- *  Int val(46);
- *  thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
- *  thrust::uninitialized_fill_n(array, N, val);
- *
- *  // Int x = array[i];
- *  // x.val == 46 for all 0 <= i < N
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/uninitialized_fill.html
- *  \see \c uninitialized_fill
- *  \see \c fill
- *  \see \c uninitialized_copy_n
- *  \see \c device_new
- *  \see \c device_malloc
- */
-template<typename ForwardIterator, typename Size, typename T>
-  ForwardIterator uninitialized_fill_n(ForwardIterator first,
-                                       Size n,
-                                       const T &x);
-
-/*! \} // end filling
- *  \} // transformations
- */
-
-} // end thrust
-
-#include <thrust/detail/uninitialized_fill.inl>
-
diff --git a/compat/thrust/unique.h b/compat/thrust/unique.h
deleted file mode 100644
index 98150f36c9..0000000000
--- a/compat/thrust/unique.h
+++ /dev/null
@@ -1,960 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-
-/*! \file unique.h
- *  \brief Move unique elements to the front of a range
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-#include <thrust/detail/execution_policy.h>
-#include <thrust/pair.h>
-
-namespace thrust
-{
-
-
-/*! \addtogroup stream_compaction
- *  \{
- */
-
-
-/*! For each group of consecutive elements in the range <tt>[first, last)</tt>
- *  with the same value, \p unique removes all but the first element of 
- *  the group. The return value is an iterator \c new_last such that 
- *  no two consecutive elements in the range <tt>[first, new_last)</tt> are
- *  equal. The iterators in the range <tt>[new_last, last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified.
- *  \p unique is stable, meaning that the relative order of elements that are
- *  not removed is unchanged.
- *
- *  This version of \p unique uses \c operator== to test for equality.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \return The end of the unique range <tt>[first, new_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p unique to
- *  compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int *new_end = thrust::unique(thrust::host, A, A + N);
- *  // The first four values of A are now {1, 3, 2, 1}
- *  // Values beyond new_end are unspecified.
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/unique.html
- *  \see unique_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator>
-ForwardIterator unique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last);
-
-
-/*! For each group of consecutive elements in the range <tt>[first, last)</tt>
- *  with the same value, \p unique removes all but the first element of 
- *  the group. The return value is an iterator \c new_last such that 
- *  no two consecutive elements in the range <tt>[first, new_last)</tt> are
- *  equal. The iterators in the range <tt>[new_last, last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified.
- *  \p unique is stable, meaning that the relative order of elements that are
- *  not removed is unchanged.
- *
- *  This version of \p unique uses \c operator== to test for equality.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \return The end of the unique range <tt>[first, new_last)</tt>.
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *
- *  The following code snippet demonstrates how to use \p unique to
- *  compact a sequence of numbers to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int *new_end = thrust::unique(A, A + N);
- *  // The first four values of A are now {1, 3, 2, 1}
- *  // Values beyond new_end are unspecified.
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/unique.html
- *  \see unique_copy
- */
-template <typename ForwardIterator>
-ForwardIterator unique(ForwardIterator first,
-                       ForwardIterator last);
-
-
-/*! For each group of consecutive elements in the range <tt>[first, last)</tt>
- *  with the same value, \p unique removes all but the first element of 
- *  the group. The return value is an iterator \c new_last such that 
- *  no two consecutive elements in the range <tt>[first, new_last)</tt> are
- *  equal. The iterators in the range <tt>[new_last, last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified.
- *  \p unique is stable, meaning that the relative order of elements that are
- *  not removed is unchanged.
- *
- *  This version of \p unique uses the function object \p binary_pred to test
- *  for equality.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[first, new_last)</tt>
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p unique to
- *  compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int *new_end = thrust::unique(thrust::host, A, A + N, thrust::equal_to<int>());
- *  // The first four values of A are now {1, 3, 2, 1}
- *  // Values beyond new_end are unspecified.
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/unique.html
- *  \see unique_copy
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator,
-         typename BinaryPredicate>
-ForwardIterator unique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       ForwardIterator first,
-                       ForwardIterator last,
-                       BinaryPredicate binary_pred);
-
-
-/*! For each group of consecutive elements in the range <tt>[first, last)</tt>
- *  with the same value, \p unique removes all but the first element of 
- *  the group. The return value is an iterator \c new_last such that 
- *  no two consecutive elements in the range <tt>[first, new_last)</tt> are
- *  equal. The iterators in the range <tt>[new_last, last)</tt> are all still
- *  dereferenceable, but the elements that they point to are unspecified.
- *  \p unique is stable, meaning that the relative order of elements that are
- *  not removed is unchanged.
- *
- *  This version of \p unique uses the function object \p binary_pred to test
- *  for equality.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[first, new_last)</tt>
- *
- *  \tparam ForwardIterator is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator is mutable,
- *          and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  The following code snippet demonstrates how to use \p unique to
- *  compact a sequence of numbers to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int *new_end = thrust::unique(A, A + N, thrust::equal_to<int>());
- *  // The first four values of A are now {1, 3, 2, 1}
- *  // Values beyond new_end are unspecified.
- *  \endcode
- *
- *  \see http://www.sgi.com/tech/stl/unique.html
- *  \see unique_copy
- */
-template <typename ForwardIterator,
-          typename BinaryPredicate>
-ForwardIterator unique(ForwardIterator first,
-                       ForwardIterator last,
-                       BinaryPredicate binary_pred);
-
-
-/*! \p unique_copy copies elements from the range <tt>[first, last)</tt>
- * to a range beginning with \p result, except that in a consecutive group
- * of duplicate elements only the first one is copied. The return value
- * is the end of the range to which the elements are copied. 
- *
- * The reason there are two different versions of unique_copy is that there
- * are two different definitions of what it means for a consecutive group of
- * elements to be duplicates. In the first version, the test is simple
- * equality: the elements in a range <tt>[f, l)</tt> are duplicates if,
- * for every iterator \p i in the range, either <tt>i == f</tt> or else 
- * <tt>*i == *(i-1)</tt>. In the second, the test is an arbitrary 
- * \p BinaryPredicate \p binary_pred: the elements in <tt>[f, l)</tt> are
- * duplicates if, for every iterator \p i in the range, either <tt>i == f</tt>
- * or else <tt>binary_pred(*i, *(i-1))</tt> is \p true.
- *
- * This version of \p unique_copy uses \c operator== to test for equality.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \return The end of the unique range <tt>[result, result_end)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The range <tt>[first,last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_copy to
- *  compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int B[N];
- *  int *result_end = thrust::unique_copy(thrust::host, A, A + N, B);
- *  // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4
- *  // Values beyond result_end are unspecified
- *  \endcode
- *
- *  \see unique
- *  \see http://www.sgi.com/tech/stl/unique_copy.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator>
-OutputIterator unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator result);
-
-
-/*! \p unique_copy copies elements from the range <tt>[first, last)</tt>
- * to a range beginning with \p result, except that in a consecutive group
- * of duplicate elements only the first one is copied. The return value
- * is the end of the range to which the elements are copied. 
- *
- * The reason there are two different versions of unique_copy is that there
- * are two different definitions of what it means for a consecutive group of
- * elements to be duplicates. In the first version, the test is simple
- * equality: the elements in a range <tt>[f, l)</tt> are duplicates if,
- * for every iterator \p i in the range, either <tt>i == f</tt> or else 
- * <tt>*i == *(i-1)</tt>. In the second, the test is an arbitrary 
- * \p BinaryPredicate \p binary_pred: the elements in <tt>[f, l)</tt> are
- * duplicates if, for every iterator \p i in the range, either <tt>i == f</tt>
- * or else <tt>binary_pred(*i, *(i-1))</tt> is \p true.
- *
- * This version of \p unique_copy uses \c operator== to test for equality.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \return The end of the unique range <tt>[result, result_end)</tt>.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type.
- *
- *  \pre The range <tt>[first,last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_copy to
- *  compact a sequence of numbers to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int B[N];
- *  int *result_end = thrust::unique_copy(A, A + N, B);
- *  // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4
- *  // Values beyond result_end are unspecified
- *  \endcode
- *
- *  \see unique
- *  \see http://www.sgi.com/tech/stl/unique_copy.html
- */
-template <typename InputIterator,
-          typename OutputIterator>
-OutputIterator unique_copy(InputIterator first,
-                           InputIterator last,
-                           OutputIterator result);
-
-
-/*! \p unique_copy copies elements from the range <tt>[first, last)</tt>
- * to a range beginning with \p result, except that in a consecutive group
- * of duplicate elements only the first one is copied. The return value
- * is the end of the range to which the elements are copied. 
- *
- * This version of \p unique_copy uses the function object \c binary_pred 
- * to test for equality.
- *
- * The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[result, result_end)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The range <tt>[first,last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_copy to
- *  compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution
- *  policy for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int B[N];
- *  int *result_end = thrust::unique_copy(thrust::host, A, A + N, B, thrust::equal_to<int>());
- *  // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4
- *  // Values beyond result_end are unspecified.
- *  \endcode
- *
- *  \see unique
- *  \see http://www.sgi.com/tech/stl/unique_copy.html
- */
-template<typename DerivedPolicy,
-         typename InputIterator,
-         typename OutputIterator,
-         typename BinaryPredicate>
-OutputIterator unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                           InputIterator first,
-                           InputIterator last,
-                           OutputIterator result,
-                           BinaryPredicate binary_pred);
-                       
-
-/*! \p unique_copy copies elements from the range <tt>[first, last)</tt>
- * to a range beginning with \p result, except that in a consecutive group
- * of duplicate elements only the first one is copied. The return value
- * is the end of the range to which the elements are copied. 
- *
- * This version of \p unique_copy uses the function object \c binary_pred 
- * to test for equality.
- *
- *  \param first The beginning of the input range.
- *  \param last  The end of the input range.
- *  \param result The beginning of the output range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[result, result_end)</tt>.
- *
- *  \tparam InputIterator is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *          and \p InputIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam OutputIterator is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The range <tt>[first,last)</tt> and the range <tt>[result, result + (last - first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_copy to
- *  compact a sequence of numbers to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1};
- *  int B[N];
- *  int *result_end = thrust::unique_copy(A, A + N, B, thrust::equal_to<int>());
- *  // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4
- *  // Values beyond result_end are unspecified.
- *  \endcode
- *
- *  \see unique
- *  \see http://www.sgi.com/tech/stl/unique_copy.html
- */
-template <typename InputIterator,
-          typename OutputIterator,
-          typename BinaryPredicate>
-OutputIterator unique_copy(InputIterator first,
-                           InputIterator last,
-                           OutputIterator result,
-                           BinaryPredicate binary_pred);
-
-
-/*! \p unique_by_key is a generalization of \p unique to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key removes all but the first element of 
- *  the group.  Similarly, the corresponding values in the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> 
- *  are also removed.
- *
- *  The return value is a \p pair of iterators <tt>(new_keys_last,new_values_last)</tt>
- *  such that no two consecutive elements in the range <tt>[keys_first, new_keys_last)</tt>
- *  are equal.
- *
- *  This version of \p unique_by_key uses \c operator== to test for equality and 
- *  \c project1st to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key range.
- *  \param keys_last  The end of the key range.
- *  \param values_first The beginning of the value range.
- *  \return A pair of iterators at end of the ranges <tt>[key_first, keys_new_last)</tt> and <tt>[values_first, values_new_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1 is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last)</tt> and the range <tt>[values_first, values_first + (keys_last - keys_first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key to
- *  compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::unique_by_key(thrust::host, A, A + N, B);
- *
- *  // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4.
- *  // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4.
- *  \endcode
- *
- *  \see unique
- *  \see unique_by_key_copy
- *  \see reduce_by_key
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-  unique_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                ForwardIterator1 keys_first, 
-                ForwardIterator1 keys_last,
-                ForwardIterator2 values_first);
-
-
-/*! \p unique_by_key is a generalization of \p unique to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key removes all but the first element of 
- *  the group.  Similarly, the corresponding values in the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> 
- *  are also removed.
- *
- *  The return value is a \p pair of iterators <tt>(new_keys_last,new_values_last)</tt>
- *  such that no two consecutive elements in the range <tt>[keys_first, new_keys_last)</tt>
- *  are equal.
- *
- *  This version of \p unique_by_key uses \c operator== to test for equality and 
- *  \c project1st to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the key range.
- *  \param keys_last  The end of the key range.
- *  \param values_first The beginning of the value range.
- *  \return A pair of iterators at end of the ranges <tt>[key_first, keys_new_last)</tt> and <tt>[values_first, values_new_last)</tt>.
- *
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1 is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2 is mutable.
- *
- *  \pre The range <tt>[keys_first, keys_last)</tt> and the range <tt>[values_first, values_first + (keys_last - keys_first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key to
- *  compact a sequence of key/value pairs to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::unique_by_key(A, A + N, B);
- *
- *  // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4.
- *  // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4.
- *  \endcode
- *
- *  \see unique
- *  \see unique_by_key_copy
- *  \see reduce_by_key
- */
-template <typename ForwardIterator1,
-          typename ForwardIterator2>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-  unique_by_key(ForwardIterator1 keys_first, 
-                ForwardIterator1 keys_last,
-                ForwardIterator2 values_first);
-
-
-/*! \p unique_by_key is a generalization of \p unique to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key removes all but the first element of 
- *  the group.  Similarly, the corresponding values in the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> 
- *  are also removed.
- *
- *  This version of \p unique_by_key uses the function object \c binary_pred
- *  to test for equality and \c project1st to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the key range.
- *  \param keys_last  The end of the key range.
- *  \param values_first The beginning of the value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[first, new_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1 is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2 is mutable.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last)</tt> and the range <tt>[values_first, values_first + (keys_last - keys_first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key to
- *  compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host
- *  execution policy for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::unique_by_key(thrust::host, keys, keys + N, values, binary_pred);
- *
- *  // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4.
- *  // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4.
- *  \endcode
- *
- *  \see unique
- *  \see unique_by_key_copy
- *  \see reduce_by_key
- */
-template<typename DerivedPolicy,
-         typename ForwardIterator1,
-         typename ForwardIterator2,
-         typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-    unique_by_key(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                  ForwardIterator1 keys_first, 
-                  ForwardIterator1 keys_last,
-                  ForwardIterator2 values_first,
-                  BinaryPredicate binary_pred);
-
-
-/*! \p unique_by_key is a generalization of \p unique to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key removes all but the first element of 
- *  the group.  Similarly, the corresponding values in the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> 
- *  are also removed.
- *
- *  This version of \p unique_by_key uses the function object \c binary_pred
- *  to test for equality and \c project1st to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the key range.
- *  \param keys_last  The end of the key range.
- *  \param values_first The beginning of the value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return The end of the unique range <tt>[first, new_last)</tt>.
- *
- *  \tparam ForwardIterator1 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator1 is mutable,
- *          and \p ForwardIterator's \c value_type is a model of <a href="http://www.sgi.com/tech/stl/EqualityComparable.html">Equality Comparable</a>.
- *  \tparam ForwardIterator2 is a model of <a href="http://www.sgi.com/tech/stl/ForwardIterator.html">Forward Iterator</a>,
- *          and \p ForwardIterator2 is mutable.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The range <tt>[keys_first, keys_last)</tt> and the range <tt>[values_first, values_first + (keys_last - keys_first))</tt> shall not overlap.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key to
- *  compact a sequence of key/value pairs to remove consecutive duplicates.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::unique_by_key(keys, keys + N, values, binary_pred);
- *
- *  // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4.
- *  // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4.
- *  \endcode
- *
- *  \see unique
- *  \see unique_by_key_copy
- *  \see reduce_by_key
- */
-template <typename ForwardIterator1,
-          typename ForwardIterator2,
-          typename BinaryPredicate>
-  thrust::pair<ForwardIterator1,ForwardIterator2>
-  unique_by_key(ForwardIterator1 keys_first, 
-                ForwardIterator1 keys_last,
-                ForwardIterator2 values_first,
-                BinaryPredicate binary_pred);
-
-
-/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key_copy copies the first element of the group to
- *  a range beginning with \c keys_result and the corresponding values from the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> are copied to a range
- *  beginning with \c values_result.
- *
- *  This version of \p unique_by_key_copy uses \c operator== to test for equality and
- *  \c project1st to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_result The beginning of the output key range.
- *  \param values_result The beginning of the output value range.
- *  \return A pair of iterators at end of the ranges <tt>[keys_result, keys_result_last)</tt> and <tt>[values_result, values_result_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key_copy to
- *  compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy
- *  for parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4.
- *  \endcode
- *
- *  \see unique_copy
- *  \see unique_by_key
- *  \see reduce_by_key
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_result,
-                       OutputIterator2 values_result);
-
-
-/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key_copy copies the first element of the group to
- *  a range beginning with \c keys_result and the corresponding values from the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> are copied to a range
- *  beginning with \c values_result.
- *
- *  This version of \p unique_by_key_copy uses \c operator== to test for equality and
- *  \c project1st to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_result The beginning of the output key range.
- *  \param values_result The beginning of the output value range.
- *  \return A pair of iterators at end of the ranges <tt>[keys_result, keys_result_last)</tt> and <tt>[values_result, values_result_last)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key_copy to
- *  compact a sequence of key/value pairs and with equal keys.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  new_end = thrust::unique_by_key_copy(A, A + N, B, C, D);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4.
- *  \endcode
- *
- *  \see unique_copy
- *  \see unique_by_key
- *  \see reduce_by_key
- */
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  unique_by_key_copy(InputIterator1 keys_first, 
-                     InputIterator1 keys_last,
-                     InputIterator2 values_first,
-                     OutputIterator1 keys_result,
-                     OutputIterator2 values_result);
-
-
-/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key_copy copies the first element of the group to
- *  a range beginning with \c keys_result and the corresponding values from the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> are copied to a range
- *  beginning with \c values_result.
- *
- *  This version of \p unique_by_key_copy uses the function object \c binary_pred
- *  to test for equality and \c project1st to reduce values with equal keys.
- *
- *  The algorithm's execution is parallelized as determined by \p exec.
- *
- *  \param exec The execution policy to use for parallelization.
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_result The beginning of the output key range.
- *  \param values_result The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return A pair of iterators at end of the ranges <tt>[keys_result, keys_result_last)</tt> and <tt>[values_result, values_result_last)</tt>.
- *
- *  \tparam DerivedPolicy The name of the derived execution policy.
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key_copy to
- *  compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy for
- *  parallelization:
- *
- *  \code
- *  #include <thrust/unique.h>
- *  #include <thrust/execution_policy.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D, binary_pred);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4.
- *  \endcode
- *
- *  \see unique_copy
- *  \see unique_by_key
- *  \see reduce_by_key
- */
-template<typename DerivedPolicy,
-         typename InputIterator1,
-         typename InputIterator2,
-         typename OutputIterator1,
-         typename OutputIterator2,
-         typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-    unique_by_key_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-                       InputIterator1 keys_first, 
-                       InputIterator1 keys_last,
-                       InputIterator2 values_first,
-                       OutputIterator1 keys_result,
-                       OutputIterator2 values_result,
-                       BinaryPredicate binary_pred);
-
-
-/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs.
- *  For each group of consecutive keys in the range <tt>[keys_first, keys_last)</tt>
- *  that are equal, \p unique_by_key_copy copies the first element of the group to
- *  a range beginning with \c keys_result and the corresponding values from the range
- *  <tt>[values_first, values_first + (keys_last - keys_first))</tt> are copied to a range
- *  beginning with \c values_result.
- *
- *  This version of \p unique_by_key_copy uses the function object \c binary_pred
- *  to test for equality and \c project1st to reduce values with equal keys.
- *
- *  \param keys_first The beginning of the input key range.
- *  \param keys_last  The end of the input key range.
- *  \param values_first The beginning of the input value range.
- *  \param keys_result The beginning of the output key range.
- *  \param values_result The beginning of the output value range.
- *  \param binary_pred  The binary predicate used to determine equality.
- *  \return A pair of iterators at end of the ranges <tt>[keys_result, keys_result_last)</tt> and <tt>[values_result, values_result_last)</tt>.
- *
- *  \tparam InputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam InputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/InputIterator.html">Input Iterator</a>,
- *  \tparam OutputIterator1 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type.
- *  \tparam OutputIterator2 is a model of <a href="http://www.sgi.com/tech/stl/OutputIterator.html">Output Iterator</a> and
- *          and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type.
- *  \tparam BinaryPredicate is a model of <a href="http://www.sgi.com/tech/stl/BinaryPredicate.html">Binary Predicate</a>.
- *
- *  \pre The input ranges shall not overlap either output range.
- *
- *  The following code snippet demonstrates how to use \p unique_by_key_copy to
- *  compact a sequence of key/value pairs and with equal keys.
- *
- *  \code
- *  #include <thrust/unique.h>
- *  ...
- *  const int N = 7;
- *  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
- *  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
- *  int C[N];                         // output keys
- *  int D[N];                         // output values
- *
- *  thrust::pair<int*,int*> new_end;
- *  thrust::equal_to<int> binary_pred;
- *  new_end = thrust::unique_by_key_copy(A, A + N, B, C, D, binary_pred);
- *
- *  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
- *  // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4.
- *  \endcode
- *
- *  \see unique_copy
- *  \see unique_by_key
- *  \see reduce_by_key
- */
-template <typename InputIterator1,
-          typename InputIterator2,
-          typename OutputIterator1,
-          typename OutputIterator2,
-          typename BinaryPredicate>
-  thrust::pair<OutputIterator1,OutputIterator2>
-  unique_by_key_copy(InputIterator1 keys_first, 
-                     InputIterator1 keys_last,
-                     InputIterator2 values_first,
-                     OutputIterator1 keys_result,
-                     OutputIterator2 values_result,
-                     BinaryPredicate binary_pred);
-
-
-/*! \} // end stream_compaction
- */
-
-
-} // end namespace thrust
-
-#include <thrust/detail/unique.inl>
-
diff --git a/compat/thrust/version.h b/compat/thrust/version.h
deleted file mode 100644
index 730997eecc..0000000000
--- a/compat/thrust/version.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2008-2012 NVIDIA Corporation
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-/*! \file version.h
- *  \brief Compile-time macros encoding Thrust release version
- *
- *         <thrust/version.h> is the only Thrust header that is guaranteed to
- *         change with every thrust release.
- */
-
-#pragma once
-
-#include <thrust/detail/config.h>
-
-//  This is the only thrust header that is guaranteed to 
-//  change with every thrust release.
-//
-//  THRUST_VERSION % 100 is the sub-minor version
-//  THRUST_VERSION / 100 % 1000 is the minor version
-//  THRUST_VERSION / 100000 is the major version
-
-/*! \def THRUST_VERSION
- *  \brief The preprocessor macro \p THRUST_VERSION encodes the version
- *         number of the Thrust library.
- *
- *         <tt>THRUST_VERSION % 100</tt> is the sub-minor version.
- *         <tt>THRUST_VERSION / 100 % 1000</tt> is the minor version.
- *         <tt>THRUST_VERSION / 100000</tt> is the major version.
- */
-#define THRUST_VERSION 100700
-
-/*! \def THRUST_MAJOR_VERSION
- *  \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the
- *         major version number of the Thrust library.
- */
-#define THRUST_MAJOR_VERSION     (THRUST_VERSION / 100000)
-
-/*! \def THRUST_MINOR_VERSION
- *  \brief The preprocessor macro \p THRUST_MINOR_VERSION encodes the
- *         minor version number of the Thrust library.
- */
-#define THRUST_MINOR_VERSION     (THRUST_VERSION / 100 % 1000)
-
-/*! \def THRUST_SUBMINOR_VERSION
- *  \brief The preprocessor macro \p THRUST_SUBMINOR_VERSION encodes the
- *         sub-minor version number of the Thrust library.
- */
-#define THRUST_SUBMINOR_VERSION  (THRUST_VERSION % 100)
-
-// Declare these namespaces here for the purpose of Doxygenating them
-
-/*! \namespace thrust
- *  \brief \p thrust is the top-level namespace which contains all Thrust
- *         functions and types.
- */
-namespace thrust
-{
-
-}
-
diff --git a/configure.ac b/configure.ac
index 0cb83db9e1..b06c76e169 100644
--- a/configure.ac
+++ b/configure.ac
@@ -57,6 +57,7 @@ esac
 PTHREAD_FLAGS="-pthread"
 WS2_LIBS=""
 
+
 case $target in
   *-*-mingw*)
     have_win32=true
diff --git a/cpu-miner.c b/cpu-miner.c
index 9e3c3b0ac3..b1cf7d8cee 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -7,19 +7,21 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.  See COPYING for more details.
  */
-
+ 
 #include "cpuminer-config.h"
 #define _GNU_SOURCE
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdbool.h>
+#include <stdbool.h> 
 #include <inttypes.h>
 #include <unistd.h>
 #include <sys/time.h>
 #include <time.h>
+#include <math.h>
 #ifdef WIN32
+
 #include <windows.h>
 #else
 #include <errno.h>
@@ -44,7 +46,7 @@
 #pragma comment(lib, "winmm.lib")
 #endif
 
-#define PROGRAM_NAME		"minerd"
+#define PROGRAM_NAME		"ccminer djm edition"
 #define LP_SCANTIME		60
 #define HEAVYCOIN_BLKHDR_SZ		84
 #define MNR_BLKHDR_SZ 80
@@ -130,10 +132,24 @@ typedef enum {
 	ALGO_JACKPOT,
 	ALGO_QUARK,
 	ALGO_ANIME,
+	ALGO_QUBIT,
+	ALGO_FRESH,
 	ALGO_NIST5,
 	ALGO_X11,
 	ALGO_X13,
+	ALGO_X14,
+	ALGO_X15,
+	ALGO_X17,
+	ALGO_WH,
+	ALGO_KECCAK,
+	ALGO_M7,
+	ALGO_LYRA,
+    ALGO_NEOSCRYPT,
+	ALGO_PLUCK,
+	ALGO_DEEP,
+	ALGO_DOOM,
 	ALGO_DMD_GR,
+	ALGO_GOAL,
 } sha256_algos;
 
 static const char *algo_names[] = {
@@ -145,10 +161,24 @@ static const char *algo_names[] = {
 	"jackpot",
 	"quark",
 	"anime",
+	"qubit",
+	"fresh",
 	"nist5",
 	"x11",
 	"x13",
+	"x14",
+	"x15",
+	"x17",
+	"whirlcoin",
+	"keccak",
+	"m7",
+	"lyra2",
+	"neoscrypt",
+	"pluck",
+	"deep",
+	"doom",
 	"dmd-gr",
+	"goalcoin",
 };
 
 bool opt_debug = false;
@@ -157,8 +187,12 @@ bool opt_benchmark = false;
 bool want_longpoll = true;
 bool have_longpoll = false;
 bool want_stratum = true;
+bool have_gbt = true;
 bool have_stratum = false;
+bool allow_getwork = true;
+bool opt_redirect = true;
 static bool submit_old = false;
+static char* lp_id;
 bool use_syslog = false;
 static bool opt_background = false;
 static bool opt_quiet = false;
@@ -170,15 +204,19 @@ static json_t *opt_config;
 static const bool opt_time = true;
 static sha256_algos opt_algo = ALGO_HEAVY;
 static int opt_n_threads = 0;
-static double opt_difficulty = 1; // CH
+static double opt_difficulty = 1.; // CH
 bool opt_trust_pool = false;
 uint16_t opt_vote = 9999;
 static int num_processors;
 int device_map[8] = {0,1,2,3,4,5,6,7}; // CB
 char *device_name[8]; // CB
+float tp_coef[8] = { -1.0};
 static char *rpc_url;
 static char *rpc_userpass;
 static char *rpc_user, *rpc_pass;
+static int pk_script_size;
+static unsigned char pk_script[25];
+static char coinbase_sig[101] = "";
 char *opt_cert;
 char *opt_proxy;
 long opt_proxy_type;
@@ -188,6 +226,13 @@ int longpoll_thr_id = -1;
 int stratum_thr_id = -1;
 struct work_restart *work_restart = NULL;
 static struct stratum_ctx stratum;
+//// m7 stuff
+static unsigned char pblank[1];
+const void* ptr; 
+    size_t sz; 
+uint32_t *m7buf;
+////////////////
+
 
 pthread_mutex_t applog_lock;
 static pthread_mutex_t stats_lock;
@@ -205,7 +250,7 @@ struct option {
 	int *flag;
 	int val;
 };
-#endif
+#endif 
 
 static char const usage[] = "\
 Usage: " PROGRAM_NAME " [OPTIONS]\n\
@@ -219,14 +264,29 @@ Options:\n\
                         jackpot   Jackpot hash\n\
                         quark     Quark hash\n\
                         anime     Animecoin hash\n\
+		        qubit     qubitcoin hash\n\
+		        fresh     freshcoin hash\n\
                         nist5     NIST5 (TalkCoin) hash\n\
                         x11       X11 (DarkCoin) hash\n\
                         x13       X13 (MaruCoin) hash\n\
+                        x14       X14 (MoronCoin) hash\n\
+			x15       X15 (BitBlock) hash\n\
+			x17       X17 (people currency coin) hash\n\
+			whirlcoin  whirlcoin (whirlcoin) hash\n\
+			keccak     keccak256 (maxcoin) hash\n\
+			m7         m7  (crytonite) hash\n\
+			lyra2      lyra2RE  (VertCoin) hash\n\
+            neoscrypt  neoscrypt (FeatherCoin) hash\n\
+            pluck      pluck (SupCoin) hash\n\
+			deep       deep  (deepcoin) hash\n\
+			doom       doomcoin  hash\n\
                         dmd-gr    Diamond-Groestl hash\n\
+			goalcoin   goalcoin hash\n\
   -d, --devices         takes a comma separated list of CUDA devices to use.\n\
                         Device IDs start counting from 0! Alternatively takes\n\
                         string names of your cards like gtx780ti or gt640#2\n\
                         (matching 2nd gt640 in the PC)\n\
+  -F, --throughput     coefficient to apply to the number of threads\n\
   -f, --diff            Divide difficulty by this factor (std is 1) \n\
   -v, --vote=VOTE       block reward vote (for HeavyCoin)\n\
   -m, --trust-pool      trust the max block reward vote (maxvote) sent by the pool\n\
@@ -270,8 +330,8 @@ static char const short_options[] =
 #ifdef HAVE_SYSLOG_H
 	"S"
 #endif
-	"a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:";
-
+	"a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:F:f:mv:";
+ 
 static struct option const options[] = {
 	{ "algo", 1, NULL, 'a' },
 #ifndef WIN32
@@ -279,9 +339,13 @@ static struct option const options[] = {
 #endif
 	{ "benchmark", 0, NULL, 1005 },
 	{ "cert", 1, NULL, 1001 },
+	{ "coinbase-addr", 1, NULL, 1013 },
+	{ "coinbase-sig", 1, NULL, 1015 },
 	{ "config", 1, NULL, 'c' },
 	{ "debug", 0, NULL, 'D' },
 	{ "help", 0, NULL, 'h' },
+	{ "no-gbt", 0, NULL, 1011 },
+	{ "no-getwork", 0, NULL, 1010 },
 	{ "no-longpoll", 0, NULL, 1003 },
 	{ "no-stratum", 0, NULL, 1007 },
 	{ "pass", 1, NULL, 'p' },
@@ -303,24 +367,62 @@ static struct option const options[] = {
 	{ "userpass", 1, NULL, 'O' },
 	{ "version", 0, NULL, 'V' },
 	{ "devices", 1, NULL, 'd' },
+	{ "throughput", 1, NULL, 'F'},
 	{ "diff", 1, NULL, 'f' },
 	{ 0, 0, 0, 0 }
 };
 
 struct work {
-	uint32_t data[32];
+
+	union {
+		uint16_t data16[64];
+		uint32_t data[32];
+		uint64_t data64[16];
+	};
 	uint32_t target[8];
 	uint32_t maxvote;
-
+	uint32_t hash[8];
+int height;
+char *txs;
+char *workid;
 	char job_id[128];
 	size_t xnonce2_len;
 	unsigned char xnonce2[32];
 };
-
+/*
+struct work7 {
+	CBlockHeader 	data;
+	uint32_t	target[8],hash[8];
+};
+*/
 static struct work g_work;
 static time_t g_work_time;
 static pthread_mutex_t g_work_lock;
 
+static inline void work_free(struct work *w)
+{
+	free(w->txs);
+	free(w->workid);
+	free(w->job_id);
+	free(w->xnonce2);
+}
+
+static inline void work_copy(struct work *dest, const struct work *src)
+{
+	memcpy(dest, src, sizeof(struct work));
+	if (src->txs)
+		dest->txs = strdup(src->txs);
+	if (src->workid)
+		dest->workid = strdup(src->workid);
+//	if (src->job_id)
+//		dest->job_id = strdup(src->job_id);
+//	if (src->xnonce2) {
+//		dest->xnonce2 = (unsigned char*) malloc(src->xnonce2_len);
+//		memcpy(dest->xnonce2, src->xnonce2, src->xnonce2_len);
+//	}
+}
+
+
 static bool jobj_binary(const json_t *obj, const char *key,
 			void *buf, size_t buflen)
 {
@@ -346,9 +448,22 @@ static bool jobj_binary(const json_t *obj, const char *key,
 static bool work_decode(const json_t *val, struct work *work)
 {
 	int i;
+	if (opt_algo == ALGO_M7) {
+		// printf("\n work decode \n");
+
+	if (unlikely(!jobj_binary(val, "data", work->data, 122))) {
+		applog(LOG_ERR, "JSON invalid data");
+		goto err_out;
+	}
+	if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) {
+		applog(LOG_ERR, "JSON invalid target");
+		goto err_out;
+	}
 	
-	if (unlikely(!jobj_binary(val, "data", work->data, sizeof(work->data)))) {
-		applog(LOG_ERR, "JSON inval data");
+ 
+	} else {
+	if (unlikely(!jobj_binary(val, "data", work->data, (opt_algo==ALGO_NEOSCRYPT)?84:sizeof(work->data)))) {
+		applog(LOG_ERR, "JSON inval data fucked up");
 		goto err_out;
 	}
 	if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) {
@@ -360,18 +475,299 @@ static bool work_decode(const json_t *val, struct work *work)
 			work->maxvote = 1024;
 		}
 	} else work->maxvote = 0;
-
-	for (i = 0; i < ARRAY_SIZE(work->data); i++)
+int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 21 : ARRAY_SIZE(work->data);
+	for (i = 0; i < data_size; i++)
 		work->data[i] = le32dec(work->data + i);
 	for (i = 0; i < ARRAY_SIZE(work->target); i++)
 		work->target[i] = le32dec(work->target + i);
-
+	}
 	return true;
 
 err_out:
 	return false;
 }
 
+#define BLOCK_VERSION_MASK 0x000000ff
+#define BLOCK_VERSION_CURRENT 3
+
+static bool gbt_work_decode(const json_t *val, struct work *work)
+{
+	int i, n;
+	uint32_t version, curtime, bits;
+	uint32_t prevhash[8];
+	uint32_t target[8];
+	int cbtx_size;
+	unsigned char *cbtx = NULL;
+	int tx_count, tx_size;
+	unsigned char txc_vi[9];
+	unsigned char(*merkle_tree)[32] = NULL;
+	bool coinbase_append = false;
+	bool submit_coinbase = false;
+	bool version_force = false;
+	bool version_reduce = false;
+	json_t *tmp, *txa;
+	bool rc = false;
+
+	tmp = json_object_get(val, "mutable");
+	if (tmp && json_is_array(tmp)) {
+		n = json_array_size(tmp);
+		for (i = 0; i < n; i++) {
+			const char *s = json_string_value(json_array_get(tmp, i));
+			if (!s)
+				continue;
+			if (!strcmp(s, "coinbase/append"))
+				coinbase_append = true;
+			else if (!strcmp(s, "submit/coinbase"))
+				submit_coinbase = true;
+			else if (!strcmp(s, "version/force"))
+				version_force = true;
+			else if (!strcmp(s, "version/reduce"))
+				version_reduce = true;
+		}
+	}
+
+	tmp = json_object_get(val, "height");
+	if (!tmp || !json_is_integer(tmp)) {
+		applog(LOG_ERR, "JSON invalid height");
+		goto out;
+	}
+	work->height = json_integer_value(tmp);
+
+	tmp = json_object_get(val, "version");
+	if (!tmp || !json_is_integer(tmp)) {
+		applog(LOG_ERR, "JSON invalid version");
+		goto out;
+	}
+	version = json_integer_value(tmp);
+	if ((version & BLOCK_VERSION_MASK) > BLOCK_VERSION_CURRENT) {
+		if (version_reduce) {
+			version = (version & ~BLOCK_VERSION_MASK) | BLOCK_VERSION_CURRENT;
+		}
+		else if (!version_force) {
+			applog(LOG_ERR, "Unrecognized block version: %u", version);
+			goto out;
+		}
+	}
+
+	if (unlikely(!jobj_binary(val, "previousblockhash", prevhash, sizeof(prevhash)))) {
+		applog(LOG_ERR, "JSON invalid previousblockhash");
+		goto out;
+	}
+
+	tmp = json_object_get(val, "curtime");
+	if (!tmp || !json_is_integer(tmp)) {
+		applog(LOG_ERR, "JSON invalid curtime");
+		goto out;
+	}
+	curtime = json_integer_value(tmp);
+
+	if (unlikely(!jobj_binary(val, "bits", &bits, sizeof(bits)))) {
+		applog(LOG_ERR, "JSON invalid bits");
+		goto out;
+	}
+
+	/* find count and size of transactions */
+	txa = json_object_get(val, "transactions");
+	if (!txa || !json_is_array(txa)) {
+		applog(LOG_ERR, "JSON invalid transactions");
+		goto out;
+	}
+	tx_count = json_array_size(txa);
+	tx_size = 0;
+	for (i = 0; i < tx_count; i++) {
+		const json_t *tx = json_array_get(txa, i);
+		const char *tx_hex = json_string_value(json_object_get(tx, "data"));
+		if (!tx_hex) {
+			applog(LOG_ERR, "JSON invalid transactions");
+			goto out;
+		}
+		tx_size += strlen(tx_hex) / 2;
+	}
+
+	/* build coinbase transaction */
+	tmp = json_object_get(val, "coinbasetxn");
+	if (tmp) {
+		const char *cbtx_hex = json_string_value(json_object_get(tmp, "data"));
+		cbtx_size = cbtx_hex ? strlen(cbtx_hex) / 2 : 0;
+		cbtx = (unsigned char*)malloc(cbtx_size + 100);
+		if (cbtx_size < 60 || !hex2bin(cbtx, cbtx_hex, cbtx_size)) {
+			applog(LOG_ERR, "JSON invalid coinbasetxn");
+			goto out;
+		}
+	}
+	else {
+		int64_t cbvalue;
+		if (!pk_script_size) {
+			if (allow_getwork) {
+				applog(LOG_INFO, "No payout address provided, switching to getwork");
+				have_gbt = false;
+			}
+			else
+				applog(LOG_ERR, "No payout address provided");
+			goto out;
+		}
+		tmp = json_object_get(val, "coinbasevalue");
+		if (!tmp || !json_is_number(tmp)) {
+			applog(LOG_ERR, "JSON invalid coinbasevalue");
+			goto out;
+		}
+		cbvalue = json_is_integer(tmp) ? json_integer_value(tmp) : json_number_value(tmp);
+		cbtx = (unsigned char*) malloc(256);
+		le32enc((uint32_t *)cbtx, 1); /* version */
+		cbtx[4] = 1; /* in-counter */
+		memset(cbtx + 5, 0x00, 32); /* prev txout hash */
+		le32enc((uint32_t *)(cbtx + 37), 0xffffffff); /* prev txout index */
+		cbtx_size = 43;
+		/* BIP 34: height in coinbase */
+		for (n = work->height; n; n >>= 8)
+			cbtx[cbtx_size++] = n & 0xff;
+		cbtx[42] = cbtx_size - 43;
+		cbtx[41] = cbtx_size - 42; /* scriptsig length */
+		le32enc((uint32_t *)(cbtx + cbtx_size), 0xffffffff); /* sequence */
+		cbtx_size += 4;
+		cbtx[cbtx_size++] = 1; /* out-counter */
+		le32enc((uint32_t *)(cbtx + cbtx_size), (uint32_t)cbvalue); /* value */
+		le32enc((uint32_t *)(cbtx + cbtx_size + 4), cbvalue >> 32);
+		cbtx_size += 8;
+		cbtx[cbtx_size++] = pk_script_size; /* txout-script length */
+		memcpy(cbtx + cbtx_size, pk_script, pk_script_size);
+		cbtx_size += pk_script_size;
+		le32enc((uint32_t *)(cbtx + cbtx_size), 0); /* lock time */
+		cbtx_size += 4;
+		coinbase_append = true;
+	}
+	if (coinbase_append) {
+		unsigned char xsig[100];
+		int xsig_len = 0;
+		if (*coinbase_sig) {
+			n = strlen(coinbase_sig);
+			if (cbtx[41] + xsig_len + n <= 100) {
+				memcpy(xsig + xsig_len, coinbase_sig, n);
+				xsig_len += n;
+			}
+			else {
+				applog(LOG_WARNING, "Signature does not fit in coinbase, skipping");
+			}
+		}
+		tmp = json_object_get(val, "coinbaseaux");
+		if (tmp && json_is_object(tmp)) {
+			void *iter = json_object_iter(tmp);
+			while (iter) {
+				unsigned char buf[100];
+				const char *s = json_string_value(json_object_iter_value(iter));
+				n = s ? strlen(s) / 2 : 0;
+				if (!s || n > 100 || !hex2bin(buf, s, n)) {
+					applog(LOG_ERR, "JSON invalid coinbaseaux");
+					break;
+				}
+				if (cbtx[41] + xsig_len + n <= 100) {
+					memcpy(xsig + xsig_len, buf, n);
+					xsig_len += n;
+				}
+				iter = json_object_iter_next(tmp, iter);
+			}
+		}
+		if (xsig_len) {
+			unsigned char *ssig_end = cbtx + 42 + cbtx[41];
+			int push_len = cbtx[41] + xsig_len < 76 ? 1 :
+				cbtx[41] + 2 + xsig_len > 100 ? 0 : 2;
+			n = xsig_len + push_len;
+			memmove(ssig_end + n, ssig_end, cbtx_size - 42 - cbtx[41]);
+			cbtx[41] += n;
+			if (push_len == 2)
+				*(ssig_end++) = 0x4c; /* OP_PUSHDATA1 */
+			if (push_len)
+				*(ssig_end++) = xsig_len;
+			memcpy(ssig_end, xsig, xsig_len);
+			cbtx_size += n;
+		}
+	}
+
+	n = varint_encode(txc_vi, 1 + tx_count);
+	work->txs = (char*)malloc(2 * (n + cbtx_size + tx_size) + 1);
+	abin2hex(work->txs, txc_vi, n);
+	abin2hex(work->txs + 2 * n, cbtx, cbtx_size);
+
+	/* generate merkle root */
+	merkle_tree = (unsigned char(*)[32]) malloc(32 * ((1 + tx_count + 1) & ~1));
+	sha256d(merkle_tree[0], cbtx, cbtx_size);
+	for (i = 0; i < tx_count; i++) {
+		tmp = json_array_get(txa, i);
+		const char *tx_hex = json_string_value(json_object_get(tmp, "data"));
+		const int tx_size = tx_hex ? strlen(tx_hex) / 2 : 0;
+		unsigned char *tx = (unsigned char*)malloc(tx_size);
+		if (!tx_hex || !hex2bin(tx, tx_hex, tx_size)) {
+			applog(LOG_ERR, "JSON invalid transactions");
+			free(tx);
+			goto out;
+		}
+		sha256d(merkle_tree[1 + i], tx, tx_size);
+		if (!submit_coinbase)
+			strcat(work->txs, tx_hex);
+	}
+	n = 1 + tx_count;
+	while (n > 1) {
+		if (n % 2) {
+			memcpy(merkle_tree[n], merkle_tree[n - 1], 32);
+			++n;
+		}
+		n /= 2;
+		for (i = 0; i < n; i++)
+			sha256d(merkle_tree[i], merkle_tree[2 * i], 64);
+	}
+
+	/* assemble block header */
+	work->data[0] = swab32(version);
+	for (i = 0; i < 8; i++)
+		work->data[8 - i] = le32dec(prevhash + i);
+	for (i = 0; i < 8; i++)
+		work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i);
+	work->data[17] = swab32(curtime);
+	work->data[18] = le32dec(&bits);
+	memset(work->data + 19, 0x00, 52);
+	work->data[20] = 0x80000000;
+	work->data[31] = 0x00000280;
+
+	if (unlikely(!jobj_binary(val, "target", target, sizeof(target)))) {
+		applog(LOG_ERR, "JSON invalid target");
+		goto out;
+	}
+	for (i = 0; i < ARRAY_SIZE(work->target); i++)
+		work->target[7 - i] = be32dec(target + i);
+
+	tmp = json_object_get(val, "workid");
+	if (tmp) {
+		if (!json_is_string(tmp)) {
+			applog(LOG_ERR, "JSON invalid workid");
+			goto out;
+		}
+		work->workid = strdup(json_string_value(tmp));
+	}
+
+	/* Long polling */
+	tmp = json_object_get(val, "longpollid");
+	if (want_longpoll && json_is_string(tmp)) {
+		free(lp_id);
+		lp_id = strdup(json_string_value(tmp));
+		if (!have_longpoll) {
+			char *lp_uri;
+			tmp = json_object_get(val, "longpolluri");
+			lp_uri = json_is_string(tmp) ? strdup(json_string_value(tmp)) : rpc_url;
+			have_longpoll = true;
+			tq_push(thr_info[longpoll_thr_id].q, lp_uri);
+		}
+	}
+
+	rc = true;
+
+out:
+	free(cbtx);
+	free(merkle_tree);
+	return rc;
+}
+
+
+/*
 static void share_result(int result, const char *reason)
 {
 	char s[345];
@@ -396,23 +792,90 @@ static void share_result(int result, const char *reason)
 	if (opt_debug && reason)
 		applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason);
 }
+*/
+int hashratessize=250;
+double hashrates [250]= { }; 
+double totalhashrate = 0.;
+double totalhashsquare =0.;
+int hashcomplete=0;
+int hashrow=0;
+static void share_result(int result, const char *reason)
+{
+	char s[345];
+	char s1[345];
+	char s2[345];
+	double hashrate;
+	int i;
+	double averagehashrate=0.;
+	double avsquare=0.;
+	double stddev=0.;
+	hashrate = 0.;
+	pthread_mutex_lock(&stats_lock);
+	for (i = 0; i < opt_n_threads; i++)
+		hashrate += thr_hashrates[i];
+	result ? accepted_count++ : rejected_count++;
+	pthread_mutex_unlock(&stats_lock);
+	
+	sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);	
+	totalhashrate+=(double) hashrate;
+	totalhashsquare+=pow((double)hashrate,2);
+	hashrow++;
+	averagehashrate=totalhashrate/(double)hashrow;
+	avsquare=totalhashsquare/(double)hashrow;
+	stddev = sqrt(avsquare-pow(averagehashrate,2));
+	sprintf(s1, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * averagehashrate);
+	sprintf(s2, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * stddev);
+	
+		applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s kh/s (%s +/- %s) %s",
+				accepted_count,
+				accepted_count + rejected_count,
+				100. * accepted_count / (accepted_count + rejected_count),
+				s,s1,s2, result ? "(yay!!!)" : "(booooo)");
+
+	if (opt_debug && reason)
+		applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason);
+	
+}
 
 static bool submit_upstream_work(CURL *curl, struct work *work)
 {
 	char *str = NULL;
 	json_t *val, *res, *reason;
+	char data_str[2 * sizeof(work->data) + 1];
 	char s[345];
 	int i;
 	bool rc = false;
 
 	/* pass if the previous hash is not the current previous hash */
+	if (opt_algo == ALGO_M7) {
+		if (memcmp(work->data , g_work.data , 96)) {
+			if (opt_debug)
+				applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
+			return true;
+		} 
+	} else {
 	if (memcmp(work->data + 1, g_work.data + 1, 32)) {
 		if (opt_debug)
 			applog(LOG_DEBUG, "DEBUG: stale work detected, discarding");
 		return true;
 	}
-
+	}
 	if (have_stratum) {
+		if (opt_algo == ALGO_M7) {
+			
+			uint64_t ntime, nonce;
+			char *ntimestr, *noncestr, *xnonce2str;
+
+			be64enc(&ntime, work->data64[12]);
+			be32enc(&nonce, work->data[29]);
+			ntimestr=bin2hex((const unsigned char *)(&ntime), 8);
+			noncestr=bin2hex((const unsigned char *)(&nonce), 4);
+			xnonce2str = bin2hex(work->xnonce2, work->xnonce2_len);
+			sprintf(s,
+				"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
+				rpc_user, work->job_id, xnonce2str, ntimestr, noncestr);
+			free(xnonce2str);
+		} else {
 		uint32_t ntime, nonce;
 		uint16_t nvote;
 		char *ntimestr, *noncestr, *xnonce2str, *nvotestr;
@@ -438,30 +901,101 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 		free(noncestr);
 		free(xnonce2str);
 		free(nvotestr);
-
+		}
 		if (unlikely(!stratum_send_line(&stratum, s))) {
 			applog(LOG_ERR, "submit_upstream_work stratum_send_line failed");
 			goto out;
 		}
+// gbt
+	}
+	else if (work->txs) {
+		char *req;
+
+		for (i = 0; i < ARRAY_SIZE(work->data); i++)
+			be32enc(work->data + i, work->data[i]);
+		abin2hex(data_str, (unsigned char *)work->data, 80);
+		if (work->workid) {
+			char *params;
+			val = json_object();
+			json_object_set_new(val, "workid", json_string(work->workid));
+			params = json_dumps(val, 0);
+			json_decref(val);
+			req = (char*)malloc(128 + 2 * 80 + strlen(work->txs) + strlen(params));
+			sprintf(req,
+				"{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n",
+				data_str, work->txs, params);
+			free(params);
+		}
+		else {
+			req = (char*)malloc(128 + 2 * 80 + strlen(work->txs));
+			sprintf(req,
+				"{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n",
+				data_str, work->txs);
+		}
+		val = json_rpc_call2(curl, rpc_url, rpc_userpass, req, NULL, 0);
+		free(req);
+		if (unlikely(!val)) {
+			applog(LOG_ERR, "submit_upstream_work json_rpc_call failed");
+			goto out;
+		}
+
+		res = json_object_get(val, "result");
+		if (json_is_object(res)) {
+			char *res_str;
+			bool sumres = false;
+			void *iter = json_object_iter(res);
+			while (iter) {
+				if (json_is_null(json_object_iter_value(iter))) {
+					sumres = true;
+					break;
+				}
+				iter = json_object_iter_next(res, iter);
+			}
+			res_str = json_dumps(res, 0);
+			share_result(sumres, res_str);
+			free(res_str);
+		}
+		else
+			share_result(json_is_null(res), json_string_value(res));
+
+		json_decref(val);
+///
 	} else {
 
 		/* build hex string */
-
-		if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR) {
-			for (i = 0; i < ARRAY_SIZE(work->data); i++)
+		if (opt_algo != ALGO_M7) {
+		if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR && opt_algo) {
+			int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 80 : sizeof(work->data);			
+			for (i = 0; i < (data_size >>2); i++)
 				le32enc(work->data + i, work->data[i]);
 			}
-			str = bin2hex((unsigned char *)work->data, sizeof(work->data));
+		int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 80 : sizeof(work->data);
+		str = bin2hex((unsigned char *)work->data,data_size);
 			if (unlikely(!str)) {
 				applog(LOG_ERR, "submit_upstream_work OOM");
 				goto out;
-		}
+			}
+		} else {
+			
+			
+			abin2hex(data_str,(unsigned char *)work->data, 122);
+			if (unlikely(!data_str)) {
+				applog(LOG_ERR, "submit_upstream_work OOM");
+				goto out;
+			}
+		} // M7
+
+			if (opt_algo == ALGO_M7) {
+        sprintf(s,
+			"{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n",
+			data_str);
 
+			} else {
 		/* build JSON-RPC request */
 		sprintf(s,
 			"{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n",
 			str);
-
+			}
 		/* issue JSON-RPC request */
 		val = json_rpc_call(curl, rpc_url, rpc_userpass, s, false, false, NULL);
 		if (unlikely(!val)) {
@@ -486,28 +1020,61 @@ static bool submit_upstream_work(CURL *curl, struct work *work)
 static const char *rpc_req =
 	"{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n";
 
+#define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]"
+static const char *gbt_req =
+"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
+GBT_CAPABILITIES "}], \"id\":0}\r\n";
+static const char *gbt_lp_req =
+"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": "
+GBT_CAPABILITIES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n";
+
+
 static bool get_upstream_work(CURL *curl, struct work *work)
 {
 	json_t *val;
 	bool rc;
+	int err;
 	struct timeval tv_start, tv_end, diff;
-
+start:
 	gettimeofday(&tv_start, NULL);
-	val = json_rpc_call(curl, rpc_url, rpc_userpass, rpc_req,
-			    want_longpoll, false, NULL);
+	val = json_rpc_call2(curl, rpc_url, rpc_userpass,
+		have_gbt ? gbt_req : rpc_req,
+		&err, have_gbt ? JSON_RPC_QUIET_404 : 0);
+//	val = json_rpc_call(curl, rpc_url, rpc_userpass, rpc_req,
+//			    want_longpoll, false, NULL);
 	gettimeofday(&tv_end, NULL);
-
+		
 	if (have_stratum) {
 		if (val)
 			json_decref(val);
 		return true;
 	}
 
+	if (!have_gbt && !allow_getwork) {
+		applog(LOG_ERR, "No usable protocol");
+		if (val)
+			json_decref(val);
+		return false;
+	}
+
+	if (have_gbt && allow_getwork && !val && err == CURLE_OK) {
+		applog(LOG_INFO, "getblocktemplate failed, falling back to getwork");
+		have_gbt = false;
+		goto start;
+	}
+
 	if (!val)
 		return false;
 
+	if (have_gbt) {
+		rc = gbt_work_decode(json_object_get(val, "result"), work);
+		if (!have_gbt) {
+			json_decref(val);
+			goto start;
+		}
+	}	else {
 	rc = work_decode(json_object_get(val, "result"), work);
-
+    }
 	if (opt_debug && rc) {
 		timeval_subtract(&diff, &tv_end, &tv_start);
 		applog(LOG_DEBUG, "DEBUG: got new work in %d ms",
@@ -526,6 +1093,7 @@ static void workio_cmd_free(struct workio_cmd *wc)
 
 	switch (wc->cmd) {
 	case WC_SUBMIT_WORK:
+//		work_free(wc->u.work);
 		free(wc->u.work);
 		break;
 	default: /* do nothing */
@@ -541,6 +1109,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl)
 	struct work *ret_work;
 	int failures = 0;
 
+
 	ret_work = (struct work*)calloc(1, sizeof(*ret_work));
 	if (!ret_work)
 		return false;
@@ -597,7 +1166,7 @@ static void *workio_thread(void *userdata)
 		applog(LOG_ERR, "CURL initialization failed");
 		return NULL;
 	}
-
+	// printf("workio thread\n");
 	while (ok) {
 		struct workio_cmd *wc;
 
@@ -645,7 +1214,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
 		memset(work->target, 0x00, sizeof(work->target));
 		return true;
 	}
-
+	
 	/* fill out work request message */
 	wc = (struct workio_cmd *)calloc(1, sizeof(*wc));
 	if (!wc)
@@ -653,13 +1222,13 @@ static bool get_work(struct thr_info *thr, struct work *work)
 
 	wc->cmd = WC_GET_WORK;
 	wc->thr = thr;
-
+	
 	/* send work request to workio thread */
 	if (!tq_push(thr_info[work_thr_id].q, wc)) {
 		workio_cmd_free(wc);
 		return false;
 	}
-
+	
 	/* wait for response, a unit of work */
 	work_heap = (struct work *)tq_pop(thr->q, NULL);
 	if (!work_heap)
@@ -668,7 +1237,7 @@ static bool get_work(struct thr_info *thr, struct work *work)
 	/* copy returned work into storage provided by caller */
 	memcpy(work, work_heap, sizeof(*work));
 	free(work_heap);
-
+	// printf("getwork 4\n");
 	return true;
 }
 
@@ -686,7 +1255,8 @@ static bool submit_work(struct thr_info *thr, const struct work *work_in)
 
 	wc->cmd = WC_SUBMIT_WORK;
 	wc->thr = thr;
-	memcpy(wc->u.work, work_in, sizeof(*work_in));
+//	memcpy(wc->u.work, work_in, sizeof(*work_in));
+	work_copy(wc->u.work, work_in);
 
 	/* send solution to workio thread */
 	if (!tq_push(thr_info[work_thr_id].q, wc))
@@ -703,7 +1273,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 {
 	unsigned char merkle_root[64];
 	int i;
-
+	// printf("\n stratum_gen_work\n ");
 	pthread_mutex_lock(&sctx->work_lock);
 
 	strcpy(work->job_id, sctx->job.job_id);
@@ -714,7 +1284,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 	if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR)
 		heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
 	else
-	if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
+	if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WH || opt_algo == ALGO_KECCAK )
 		SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
 	else
 		sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
@@ -768,28 +1338,70 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
 		applog(LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x",
 		       work->job_id, xnonce2str, swab32(work->data[17]));
 		free(xnonce2str);
-	}
-
-	if (opt_algo == ALGO_JACKPOT)
+	} 
+	
+	if (opt_algo == ALGO_JACKPOT || opt_algo == ALGO_NEOSCRYPT || opt_algo == ALGO_PLUCK)
 		diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty));
-	else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR)
+	else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR || opt_algo == ALGO_FRESH)
 		diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty));
+    else if (opt_algo == ALGO_KECCAK ) // || opt_algo == ALGO_LYRA)
+		diff_to_target(work->target, sctx->job.diff / (128.0 * opt_difficulty));  // seems to work best, minimize rejected share
 	else
 		diff_to_target(work->target, sctx->job.diff / opt_difficulty);
 }
 
+static void stratum_gen_work_m7(struct stratum_ctx *sctx, struct work *work)
+{
+
+	pthread_mutex_lock(&sctx->work_lock);
+	strcpy(work->job_id, sctx->job.job_id);
+	work->xnonce2_len = sctx->xnonce2_size;
+	memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size);
+
+	/* Increment extranonce2 */
+	for (int i = 0; i < (int) sctx->xnonce2_size && !++sctx->job.xnonce2[i]; i++);
+
+	/* Assemble block header */
+	memset(work->data, 0, 122);
+	memcpy(work->data, sctx->job.m7prevblock, 32);
+	memcpy(work->data + 8, sctx->job.m7accroot, 32);
+	memcpy(work->data + 16, sctx->job.m7merkleroot, 32);
+	work->data64[12] = be64dec(sctx->job.m7ntime);
+	work->data64[13] = be64dec(sctx->job.m7height);
+	unsigned char *xnonce_ptr = (unsigned char *)(work->data + 28);
+	for (int i = 0; i < (int) sctx->xnonce1_size; i++) {
+		*(xnonce_ptr + i) = sctx->xnonce1[i];
+	}
+	for (int i = 0; i < (int) work->xnonce2_len; i++) { 
+		*(xnonce_ptr + sctx->xnonce1_size + i) = work->xnonce2[i];
+	}
+	work->data16[60] = be16dec(sctx->job.m7version);
+
+	pthread_mutex_unlock(&sctx->work_lock);
+
+	diff_to_target(work->target, sctx->job.diff / (65536.0* opt_difficulty));
+
+	if (opt_debug) {
+		char data_str[245], target_str[65];
+		abin2hex(data_str, (unsigned char *)work->data, 122);
+		applog(LOG_DEBUG, "DEBUG: stratum_gen_work data %s", data_str);
+		abin2hex(target_str, (unsigned char *)work->target, 32);
+		applog(LOG_DEBUG, "DEBUG: stratum_gen_work target %s", target_str);
+	}
+}
+
 static void *miner_thread(void *userdata)
 {
 	struct thr_info *mythr = (struct thr_info *)userdata;
 	int thr_id = mythr->id;
 	struct work work;
 	uint32_t max_nonce;
-	uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
+	uint32_t end_nonce = (0xffffffffU) / opt_n_threads * (thr_id + 1) - 0x20;
 	unsigned char *scratchbuf = NULL;
 	char s[16];
-	int i;
-    static int rounds = 0;
 
+    static int rounds = 0;
+	
 	memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized
 
 	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
@@ -808,25 +1420,47 @@ static void *miner_thread(void *userdata)
 			       thr_id, thr_id % num_processors);
 		affine_to_cpu(thr_id, thr_id % num_processors);
 	}
-
+	// printf("\n miner threads 2\n");
 	while (1) {
 		unsigned long hashes_done;
+		
 		struct timeval tv_start, tv_end, diff;
 		int64_t max64;
 		int rc;
 
 		if (have_stratum) {
+			
 			while (time(NULL) >= g_work_time + 120)
 				sleep(1);
 			pthread_mutex_lock(&g_work_lock);
-			if (work.data[19] >= end_nonce)
-				stratum_gen_work(&stratum, &g_work);
+       bool nonce_over;
+			if (opt_algo == ALGO_M7) {
+				nonce_over = work.data[29] >= end_nonce;
+			} else {
+				nonce_over = work.data[19] >= end_nonce;
+			}
+		//	printf("nonce over %d\n",nonce_over);
+       if (opt_algo == ALGO_M7) {		       
+			if (work.data[29] >= end_nonce && !memcmp(work.data, g_work.data, 116))
+					stratum_gen_work_m7(&stratum, &g_work);
+				
+			} else {
+				
+				if (work.data[19] >= end_nonce && !memcmp(work.data, g_work.data, 76))		
+					stratum_gen_work(&stratum, &g_work);
+			}
 		} else {
+			int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime;
 			/* obtain new work from internal workio thread */
 			pthread_mutex_lock(&g_work_lock);
-			if (!have_stratum && (!have_longpoll ||
-					time(NULL) >= g_work_time + LP_SCANTIME*3/4 ||
-					work.data[19] >= end_nonce)) {
+			bool nonce_over;
+			if (opt_algo == ALGO_M7) {
+				nonce_over = work.data[29] >= end_nonce;
+			} else {
+				nonce_over = work.data[19] >= end_nonce;
+			}
+			
+			if (!have_stratum && (time(NULL) - g_work_time >= min_scantime || nonce_over)) {
 				if (unlikely(!get_work(mythr, &g_work))) {
 					applog(LOG_ERR, "work retrieval failed, exiting "
 						"mining thread %d", mythr->id);
@@ -835,16 +1469,33 @@ static void *miner_thread(void *userdata)
 				}
 				g_work_time = have_stratum ? 0 : time(NULL);
 			}
+		}
+///weird stuff
+/*
 			if (have_stratum) {
 				pthread_mutex_unlock(&g_work_lock);
 				continue;
 			}
-		}
+*/		
+		if (opt_algo == ALGO_M7) {
+
+
+			if (memcmp(work.data, g_work.data, 116)) {
+				memcpy(&work, &g_work, sizeof(struct work));
+//				work_free(&work);
+//				work_copy(&work, &g_work);
+				work.data[29] = (0xffffffffU) / opt_n_threads * thr_id;				
+			} else
+				work.data[29]++; // todo
+		} else {
 		if (memcmp(work.data, g_work.data, 76)) {
 			memcpy(&work, &g_work, sizeof(struct work));
+//			work_free(&work);
+//			work_copy(&work, &g_work);
 			work.data[19] = 0xffffffffU / opt_n_threads * thr_id;
 		} else
 			work.data[19]++;
+		}
 		pthread_mutex_unlock(&g_work_lock);
 		work_restart[thr_id].restart = 0;
 
@@ -855,12 +1506,36 @@ static void *miner_thread(void *userdata)
 			max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
 			      - time(NULL);
 		max64 *= (int64_t)thr_hashrates[thr_id];
-		if (max64 <= 0)
-			max64 = (opt_algo == ALGO_JACKPOT) ? 0x1fffLL : 0xfffffLL;
-		if ((int64_t)work.data[19] + max64 > end_nonce)
-			max_nonce = end_nonce;
-		else
-			max_nonce = (uint32_t)(work.data[19] + max64);
+		
+        if (max64 <= 0) {
+			switch (opt_algo) {
+			case ALGO_JACKPOT:
+				max64 = 0x1fffLL;
+				break;
+            case ALGO_NEOSCRYPT:
+            case ALGO_PLUCK:
+				max64 = 0xfffLL;
+				break;
+			case ALGO_M7:
+				max64 = 0x3ffffLL;
+				break;
+			default: 
+				max64 = 0xfffffLL;
+				break;
+			}
+		}
+		if (opt_algo == ALGO_M7) {
+			if ((int64_t) work.data[29] + max64 > (int64_t) end_nonce)
+				max_nonce = end_nonce;
+			else
+				max_nonce = (uint32_t)(work.data[29] + max64);
+		} else {
+			if ((int64_t) work.data[19] + max64 > (int64_t) end_nonce) {
+				max_nonce = end_nonce;}
+			else {
+				max_nonce = (uint32_t) (work.data[19] + max64);}
+		}
+
 
 		hashes_done = 0;
 		gettimeofday(&tv_start, NULL);
@@ -908,7 +1583,18 @@ static void *miner_thread(void *userdata)
 			rc = scanhash_anime(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
 			break;
-
+		case ALGO_QUBIT:
+			rc = scanhash_qubit(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+        case ALGO_DOOM:
+			rc = scanhash_doom(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+        case ALGO_FRESH:
+			rc = scanhash_fresh(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
 		case ALGO_NIST5:
 			rc = scanhash_nist5(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
@@ -923,35 +1609,75 @@ static void *miner_thread(void *userdata)
 			rc = scanhash_x13(thr_id, work.data, work.target,
 			                      max_nonce, &hashes_done);
 			break;
+        case ALGO_X14:
+			rc = scanhash_x14(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
+        case ALGO_X15:
+			rc = scanhash_x15(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+
+        case ALGO_X17:
+			rc = scanhash_x17(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+        case ALGO_M7:
+
+			rc = scanhash_m7(thr_id,work.data, work.target,max_nonce, &hashes_done);
+			
+			break;
+        case ALGO_LYRA:
+			rc = scanhash_lyra(thr_id,work.data, work.target,max_nonce, &hashes_done);			
+			break;
+
+		case ALGO_PLUCK:
+			rc = scanhash_pluck(thr_id, work.data, work.target, max_nonce, &hashes_done);
+			break;
+
+
+        case ALGO_WH:
+			rc = scanhash_wh(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+        case ALGO_DEEP:
+			rc = scanhash_deep(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
+		case ALGO_KECCAK:
+			rc = scanhash_keccak256(thr_id, work.data, work.target,
+			                      max_nonce, &hashes_done);
+			break;
 
 		default:
 			/* should never happen */
 			goto out;
 		}
 
-//        if (opt_benchmark)
-//            if (++rounds == 1) exit(0);
 
 		/* record scanhash elapsed time */
 		gettimeofday(&tv_end, NULL);
+		
+
 		timeval_subtract(&diff, &tv_end, &tv_start);
 		if (diff.tv_usec || diff.tv_sec) {
 			pthread_mutex_lock(&stats_lock);
-			thr_hashrates[thr_id] =
-				hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
+			thr_hashrates[thr_id] =	hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec);
 			pthread_mutex_unlock(&stats_lock);
 		}
+
 		if (!opt_quiet) {
 			sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
 				1e-3 * thr_hashrates[thr_id]);
 			applog(LOG_INFO, "GPU #%d: %s, %s khash/s",
 				device_map[thr_id], device_name[thr_id], s);
-//			applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s",
-//				thr_id, hashes_done, s);
 		}
+
 		if (opt_benchmark && thr_id == opt_n_threads - 1) {
 			double hashrate = 0.;
-			for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
+			int i;
+			for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++) 
 				hashrate += thr_hashrates[i];
 			if (i == opt_n_threads) {
 				sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);
@@ -1018,11 +1744,22 @@ static void *longpoll_thread(void *userdata)
 	applog(LOG_INFO, "Long-polling activated for %s", lp_url);
 
 	while (1) {
-		json_t *val, *soval;
+		char *req = NULL;
+		json_t *val, *soval, *res;
 		int err;
+		if (have_gbt) {
+			req = (char*)malloc(strlen(gbt_lp_req) + strlen(lp_id) + 1);
+			sprintf(req, gbt_lp_req, lp_id);
+		}
+//		val = json_rpc_call(curl, lp_url, rpc_userpass, rpc_req,
+//				    false, true, &err);
+
+		val = json_rpc_call2(curl, lp_url, rpc_userpass,
+			req ? req : rpc_req, &err,
+			JSON_RPC_LONGPOLL);
+		free(req);
+
 
-		val = json_rpc_call(curl, lp_url, rpc_userpass, rpc_req,
-				    false, true, &err);
 		if (have_stratum) {
 			if (val)
 				json_decref(val);
@@ -1030,15 +1767,27 @@ static void *longpoll_thread(void *userdata)
 		}
 		if (likely(val)) {
 			if (!opt_quiet) applog(LOG_INFO, "LONGPOLL detected new block");
-			soval = json_object_get(json_object_get(val, "result"), "submitold");
+			res = json_object_get(val, "result");
+			soval = json_object_get(res, "submitold");
 			submit_old = soval ? json_is_true(soval) : false;
 			pthread_mutex_lock(&g_work_lock);
+            bool rc;
+			if (have_gbt)
+				rc = gbt_work_decode(res, &g_work);
+			else
+				rc = work_decode(res, &g_work);
+			if (rc) {
+				time(&g_work_time);
+				restart_threads();
+			}
+/*
 			if (work_decode(json_object_get(val, "result"), &g_work)) {
 				if (opt_debug)
 					applog(LOG_DEBUG, "DEBUG: got new work");
 				time(&g_work_time);
 				restart_threads();
 			}
+*/
 			pthread_mutex_unlock(&g_work_lock);
 			json_decref(val);
 		} else {
@@ -1103,7 +1852,7 @@ static void *stratum_thread(void *userdata)
 {
 	struct thr_info *mythr = (struct thr_info *)userdata;
 	char *s;
-
+	// printf("coming here stratum thread");
 	stratum.url = (char*)tq_pop(mythr->q, NULL);
 	if (!stratum.url)
 		goto out;
@@ -1135,7 +1884,11 @@ static void *stratum_thread(void *userdata)
 		if (stratum.job.job_id &&
 		    (strcmp(stratum.job.job_id, g_work.job_id) || !g_work_time)) {
 			pthread_mutex_lock(&g_work_lock);
-			stratum_gen_work(&stratum, &g_work);
+			if (opt_algo == ALGO_M7) {
+				stratum_gen_work_m7(&stratum, &g_work);
+			} else {
+				stratum_gen_work(&stratum, &g_work);
+			}			
 			time(&g_work_time);
 			pthread_mutex_unlock(&g_work_lock);
 			if (stratum.job.clean) {
@@ -1154,8 +1907,13 @@ static void *stratum_thread(void *userdata)
 			applog(LOG_ERR, "Stratum connection interrupted");
 			continue;
 		}
+		if (opt_algo == ALGO_M7) {
+		if (!stratum_handle_method_m7(&stratum, s))
+			stratum_handle_response(s);
+		} else {
 		if (!stratum_handle_method(&stratum, s))
 			stratum_handle_response(s);
+		}
 		free(s);
 	}
 
@@ -1165,7 +1923,7 @@ static void *stratum_thread(void *userdata)
 
 static void show_version_and_exit(void)
 {
-	printf("%s\n%s\n", PACKAGE_STRING, curl_version());
+	 printf("%s\n%s\n", PACKAGE_STRING, curl_version());
 	exit(0);
 }
 
@@ -1351,6 +2109,29 @@ static void parse_arg (int key, char *arg)
 	case 1007:
 		want_stratum = false;
 		break;
+	case 1010:
+		allow_getwork = false;
+		break;
+	case 1011:
+		have_gbt = false;
+		break;
+	case 1013:			/* --coinbase-addr */
+		pk_script_size = address_to_script(pk_script, sizeof(pk_script), arg);
+		if (!pk_script_size) {
+/*
+			fprintf(stderr, "%s: invalid address -- '%s'\n",
+				pname, arg);
+*/
+			show_usage_and_exit(1);
+		}
+		break;
+	case 1015:			/* --coinbase-sig */
+		if (strlen(arg) + 1 > sizeof(coinbase_sig)) {
+//			fprintf(stderr, "%s: coinbase signature too long\n", pname);
+			show_usage_and_exit(1);
+		}
+		strcpy(coinbase_sig, arg);
+		break;
 	case 'S':
 		use_syslog = true;
 		break;
@@ -1378,8 +2159,21 @@ static void parse_arg (int key, char *arg)
 				}
 				pch = strtok (NULL, ",");
 			}
-		}
+		} 
 		break;
+
+    case 'F': 
+		{
+			char * pch = strtok (arg,",");
+			int tmp_n_threads = 0;
+            float last = 0;
+			while (pch != NULL) {
+				tp_coef[tmp_n_threads++] = last = atof(pch);
+				pch = strtok (NULL, ",");
+			}
+			while (tmp_n_threads < 8) tp_coef[tmp_n_threads++] = last;
+		}
+       break;
 	case 'f': // CH - Divisor for Difficulty
 		d = atof(arg);
 		if (d == 0)	/* sanity check */
@@ -1453,11 +2247,11 @@ static void parse_cmdline(int argc, char *argv[])
 		show_usage_and_exit(1);
 	}
 
-	if (opt_algo == ALGO_HEAVY && opt_vote == 9999) {
-		fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n",
-			argv[0]);
-		show_usage_and_exit(1);
-	}
+	//if (opt_algo == ALGO_HEAVY && opt_vote == 9999) {
+	//	fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n",
+	//		argv[0]);
+	//	show_usage_and_exit(1);
+	//}
 
 	parse_config();
 }
@@ -1481,7 +2275,7 @@ static void signal_handler(int sig)
 }
 #endif
 
-#define PROGRAM_VERSION "1.2"
+#define PROGRAM_VERSION "djm34 pluck0.1"
 int main(int argc, char *argv[])
 {
 	struct thr_info *thr;
@@ -1492,16 +2286,20 @@ int main(int argc, char *argv[])
 	SYSTEM_INFO sysinfo;
 #endif
 
-	printf("     *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
-	printf("\t             This is version "PROGRAM_VERSION" (beta)\n");
-	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
-	printf("\t  based on pooler-cpuminer extension for HVC from\n\t       https://github.com/heavycoin/cpuminer-heavycoin\n");
-	printf("\t\t\tand\n\t       http://hvc.1gh.com/\n");
-	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
-	printf("\t  LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n");
-	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
-	printf("\t  YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n");
-
+	 printf("        ***** ccMiner for nVidia GPUs by djm34  *****\n");
+	 printf("\t             This is version "PROGRAM_VERSION" \n");
+	 printf("	based on original ccMiner by Christian Buchner and Christian H. 2014 ***\n");	 
+	 printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
+	 printf("\t  based on pooler-cpuminer extension for HVC from\n\t       https://github.com/heavycoin/cpuminer-heavycoin\n");
+	 printf("\t\t\tand\n\t       http://hvc.1gh.com/\n");
+	 printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
+	 printf("\tCuda additions Copyright 2014 DJM34\n");
+	 printf("\t  FTC donation address: 6esbN82brbg3eai8fqzNGm5tmbpiYu3czM\n");
+	 printf("\t  BTC donation address: 1NENYmxwZGHsKFmyjTc5WferTn5VTFb7Ze\n");
+	 printf("\t  VTC donation address: VrLUQmH6Jk5gFii7fASc8vJ7eEgKJqhX11\n");
+    
+	 for (int i = 0; i<8; i++) {tp_coef[i]=-1;}
+    opt_difficulty = 1. ;
 	rpc_user = strdup("");
 	rpc_pass = strdup("");
 
@@ -1510,7 +2308,7 @@ int main(int argc, char *argv[])
 
 	/* parse command line */
 	parse_cmdline(argc, argv);
-
+	
 	cuda_devicenames();
 
 	if (!opt_benchmark && !rpc_url) {
@@ -1579,7 +2377,7 @@ int main(int argc, char *argv[])
 	thr_hashrates = (double *) calloc(opt_n_threads, sizeof(double));
 	if (!thr_hashrates)
 		return 1;
-
+	
 	/* init workio thread info */
 	work_thr_id = opt_n_threads;
 	thr = &thr_info[work_thr_id];
diff --git a/cuda_helper.h b/cuda_helper.h
index 8b0b3f6e90..843ad3a542 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -1,7 +1,20 @@
 #ifndef CUDA_HELPER_H
 #define CUDA_HELPER_H
 
-static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
+#ifdef __INTELLISENSE__
+#define __launch_bounds__(x)
+#define __byte_perm(x,y,z)
+#endif
+
+static __device__ void LOHI(uint32_t &lo, uint32_t &hi, uint64_t x)
+{
+	asm("{\n\t"
+		"mov.b64 {%0,%1},%2; \n\t"
+		"}"
+		: "=r"(lo), "=r"(hi) : "l"(x));
+}
+
+static __device__ unsigned long long oMAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 {
 #if __CUDA_ARCH__ >= 130
     return __double_as_longlong(__hiloint2double(HI, LO));
@@ -10,8 +23,37 @@ static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 #endif
 }
 
+static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
+{
+uint64_t result;
+asm volatile ("{\n\t"
+	"mov.b64 %0,{%1,%2}; \n\t"
+		"}"
+		: "=l"(result) : "r"(LO) , "r"(HI));
+return result;
+}
+static __device__ uint32_t HIWORD(uint64_t x)
+{
+uint32_t result;
+asm volatile ("{\n\t"
+	".reg .u32 xl; \n\t"
+	"mov.b64 {xl,%0},%1; \n\t"
+		"}"
+		: "=r"(result) : "l"(x));
+return result;
+}
+static __device__ uint32_t LOWORD(uint64_t x)
+{
+uint32_t result;
+asm volatile ("{\n\t"
+	".reg .u32 xh; \n\t"
+	"mov.b64 {%0,xh},%1; \n\t"
+		"}"
+		: "=r"(result) : "l"(x));
+return result;
+}
 // das Hi Word aus einem 64 Bit Typen extrahieren
-static __device__ uint32_t HIWORD(const uint64_t &x) {
+static __device__ uint32_t oHIWORD(const uint64_t &x) {
 #if __CUDA_ARCH__ >= 130
 	return (uint32_t)__double2hiint(__longlong_as_double(x));
 #else
@@ -19,13 +61,44 @@ static __device__ uint32_t HIWORD(const uint64_t &x) {
 #endif
 }
 
+#if __CUDA_ARCH__ < 350 
+    // Kepler (Compute 3.0)
+    #define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+    #define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+#else
+    // Kepler (Compute 3.5)
+    #define SPH_ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
+    #define SPH_ROTR32(x, n) __funnelshift_r( (x), (x), (n) )
+#endif
+
 // das Hi Word in einem 64 Bit Typen ersetzen
-static __device__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) {
+static __device__ uint64_t oREPLACE_HIWORD(const uint64_t &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32ULL);
 }
 
+static __device__ uint64_t REPLACE_HIWORD(uint64_t x, uint32_t y) {
+	asm volatile("{\n\t"
+		" .reg .u32 tl,th; \n\t"
+		"mov.b64 {tl,th},%0; \n\t"
+		"mov.b64 %0,{tl,%1}; \n\t"
+		"}"
+		: "+l"(x) : "r"(y) );
+return x;
+}
+
+
+static __device__ uint64_t REPLACE_LOWORD(uint64_t x, uint32_t y) {
+        asm volatile ("{\n\t"
+		" .reg .u32 tl,th; \n\t"
+		"mov.b64 {tl,th},%0; \n\t"
+		"mov.b64 %0,{%1,th}; \n\t"
+		"}"
+		: "+l"(x) : "r"(y) );
+return x;
+}
+
 // das Lo Word aus einem 64 Bit Typen extrahieren
-static __device__ uint32_t LOWORD(const uint64_t &x) {
+static __device__ uint32_t oLOWORD(const uint64_t &x) {
 #if __CUDA_ARCH__ >= 130
 	return (uint32_t)__double2loint(__longlong_as_double(x));
 #else
@@ -34,24 +107,37 @@ static __device__ uint32_t LOWORD(const uint64_t &x) {
 }
 
 // das Lo Word in einem 64 Bit Typen ersetzen
-static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) {
+static __device__ uint64_t oREPLACE_LOWORD(const uint64_t &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y);
 }
 
 // Endian Drehung f�r 32 Bit Typen
 static __device__ uint32_t cuda_swab32(uint32_t x)
 {
-	return __byte_perm(x, x, 0x0123);
+	return __byte_perm(x, 0, 0x0123);
 }
 
+static __device__ uint64_t swap2ll(uint32_t lo, uint32_t hi)
+{
+return(MAKE_ULONGLONG(cuda_swab32(lo),cuda_swab32(hi)));
+}
+
+
 // Endian Drehung f�r 64 Bit Typen
 static __device__ uint64_t cuda_swab64(uint64_t x) {
     return MAKE_ULONGLONG(cuda_swab32(HIWORD(x)), cuda_swab32(LOWORD(x)));
 }
+static __device__ uint64_t cuda_swab32ll(uint64_t x) {
+    return MAKE_ULONGLONG(cuda_swab32(LOWORD(x)), cuda_swab32(HIWORD(x)));
+}
+
+
+
+
 
 // diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt
 #if __CUDA_ARCH__ >= 350
-__forceinline__ __device__ uint64_t ROTR64(const uint64_t value, const int offset) {
+__forceinline__ __device__ uint64_t oROTR64(const uint64_t value, const int offset) {
     uint2 result;
     if(offset < 32) {
         asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
@@ -63,12 +149,12 @@ __forceinline__ __device__ uint64_t ROTR64(const uint64_t value, const int offse
     return  __double_as_longlong(__hiloint2double(result.y, result.x));
 }
 #else
-#define ROTR64(x, n)        (((x) >> (n)) | ((x) << (64 - (n))))
+#define oROTR64(x, n)        (((x) >> (n)) | ((x) << (64 - (n))))
 #endif
 
 // diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt
 #if __CUDA_ARCH__ >= 350
-__forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offset) {
+__forceinline__ __device__ uint64_t oROTL64(const uint64_t value, const int offset) {
     uint2 result;
     if(offset >= 32) {
         asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
@@ -80,7 +166,525 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse
     return  __double_as_longlong(__hiloint2double(result.y, result.x));
 }
 #else
+#define oROTL64(x, n)        (((x) << (n)) | ((x) >> (64 - (n))))
+#endif
+
+// Wolf0 Rotate
+#if __CUDA_ARCH__ >= 350
+__forceinline__ __device__ uint64_t ROTR64(const uint64_t x, const int y)
+{
+	uint64_t res;
+		
+	asm("{\n\t"
+			".reg .u32 tl,th,vl,vh;\n\t"
+			".reg .pred p;\n\t"
+			"mov.b64 {tl,th}, %1;\n\t"
+			"shf.r.wrap.b32 vl, tl, th, %2;\n\t"
+			"shf.r.wrap.b32 vh, th, tl, %2;\n\t"
+			"setp.lt.u32 p, %2, 32;\n\t"
+			"@p mov.b64 %0, {vl,vh};\n\t"
+			"@!p mov.b64 %0, {vh,vl};\n\t"
+			"}" : "=l"(res) : "l"(x) , "r"(y));
+	
+	return res;
+}
+#else
+#define ROTR64(x, n)        (((x) >> (n)) | ((x) << (64 - (n))))
+#endif
+
+#if __CUDA_ARCH__ >= 350
+__forceinline__ __device__ uint64_t ROTL64(const uint64_t x, const int y)
+{
+	uint64_t res;
+		
+	asm("{\n\t"
+			".reg .u32 tl,th,vl,vh;\n\t"
+			".reg .pred p;\n\t"
+			"mov.b64 {tl,th}, %1;\n\t"
+			"shf.l.wrap.b32 vl, tl, th, %2;\n\t"
+			"shf.l.wrap.b32 vh, th, tl, %2;\n\t"
+			"setp.lt.u32 p, %2, 32;\n\t"
+			"@!p mov.b64 %0, {vl,vh};\n\t"
+			"@p mov.b64 %0, {vh,vl};\n\t"
+			"}" : "=l"(res) : "l"(x) , "r"(y));
+	
+	return res;
+}
+#else
 #define ROTL64(x, n)        (((x) << (n)) | ((x) >> (64 - (n))))
 #endif
 
+__forceinline__ __device__ uint64_t xor1(uint64_t a, uint64_t b) {
+	uint64_t result;
+	asm("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(a) ,"l"(b));
+	return result;
+}
+__forceinline__ __device__ uint32_t xor1b(uint32_t a, uint32_t b) {
+	uint32_t result;
+	asm("xor.b32 %0, %1, %2;" : "=r"(result) : "r"(a) ,"r"(b));
+	return result;
+}
+
+__forceinline__ __device__ uint64_t xor3(uint64_t a, uint64_t b, uint64_t c) {
+	uint64_t result;
+	asm("{\n\t"
+		" .reg .u64 t1;\n\t"
+		"xor.b64 t1, %2, %3;\n\t"
+		"xor.b64 %0, %1, t1;\n\t" 
+		"}"
+		: "=l"(result) : "l"(a) ,"l"(b),"l"(c));
+	return result;
+}
+
+__forceinline__ __device__ uint32_t xor3b(uint32_t a, uint32_t b, uint32_t c) {
+	uint32_t result;
+	asm("{\n\t"
+		" .reg .u32 t1;\n\t"
+		"xor.b32 t1, %2, %3;\n\t"
+		"xor.b32 %0, %1, t1;\n\t" 
+		"}"
+		: "=r"(result) : "r"(a) ,"r"(b),"r"(c));
+	return result;
+}
+__forceinline__ __device__ uint64_t xor5(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) {
+	uint64_t result;
+	asm("{\n\t"
+		" .reg .u64 t1,t2,t3;\n\t"
+		"xor.b64 t1, %1, %2;\n\t"
+		"xor.b64 t2, %3, %4;\n\t"
+		"xor.b64 t3, t1, t2;\n\t"
+		"xor.b64 %0, t3,%5;\n\t"
+		"}"
+		: "=l"(result) : "l"(a) ,"l"(b), "l"(c), "l"(d) ,"l"(e));
+	return result;
+}
+
+
+
+__forceinline__ __device__ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t f,uint64_t g, uint64_t h) {
+	uint64_t result;
+	asm volatile ("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(g) ,"l"(h));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(f));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(e));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(d));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(c));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(b));
+	asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(a));
+	return result;
+}
+
+__forceinline__ __device__ uint32_t xandx(uint32_t a, uint32_t b, uint32_t c)
+{
+	uint32_t result;
+	asm("{\n\t"
+		".reg .u32 m,n;\n\t"
+		"xor.b32 m, %2,%3;\n\t"
+		"and.b32 n, m,%1;\n\t"
+		"xor.b32 %0, n,%3;\n\t"
+		"}\n\t"
+		: "=r"(result) : "r"(a), "r"(b), "r"(c));
+	return result;
+
+}
+__forceinline__ __device__ uint64_t xandx64(uint64_t a, uint64_t b, uint64_t c)
+{
+	uint64_t result;
+	asm("{\n\t"
+		".reg .u64 m,n;\n\t"
+		"xor.b64 m, %2,%3;\n\t"
+		"and.b64 n, m,%1;\n\t"
+		"xor.b64 %0, n,%3;\n\t"
+		"}\n\t"
+		: "=l"(result) : "l"(a), "l"(b), "l"(c));
+	return result;
+
+}
+
+__forceinline__ __device__ uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c)
+{
+	uint64_t result;
+	asm("{\n\t"
+		".reg .u64 m,n;\n\t"
+		"not.b64 m,%2; \n\t"
+		"or.b64 n, %1,m;\n\t"
+		"xor.b64 %0, n,%3;\n\t"
+		"}\n\t"
+		: "=l"(result) : "l"(a), "l"(b), "l"(c));
+	return result;
+
+}
+
+__forceinline__ __device__ void chi(uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3, uint64_t &s4)
+{
+	asm("{\n\t"
+		".reg .u64 m0,m1,m2,m3,m4;\n\t"
+		".reg .u64 z0,z1,z2,z3,z4;\n\t"
+		"not.b64 m0,%0; \n\t"
+		"not.b64 m1,%1; \n\t"
+		"not.b64 m2,%2; \n\t"
+		"not.b64 m3,%3; \n\t"
+		"not.b64 m4,%4; \n\t"
+		"and.b64 z1,m1,%2;\n\t"
+		"and.b64 z2,m2,%3;\n\t"
+		"and.b64 z3,m3,%4;\n\t"
+		"and.b64 z4,m4,%0;\n\t"
+		"and.b64 z0,m0,%1;\n\t"
+		"xor.b64 %0,%0,z1;\n\t"
+		"xor.b64 %1,%1,z2;\n\t"
+		"xor.b64 %2,%2,z3;\n\t"
+		"xor.b64 %3,%3,z4;\n\t"
+		"xor.b64 %4,%4,z0;\n\t"		
+		"}\n\t"
+		: "+l"(s0),"+l"(s1),"+l"(s2),"+l"(s3),"+l"(s4));
+}
+__forceinline__ __device__ uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c)
+{
+	uint64_t result;
+	asm("{\n\t"
+		".reg .u64 m,n;\n\t"
+		"not.b64 m,%3; \n\t"
+		"or.b64 n, %2,m;\n\t"
+		"xor.b64 %0, %1,n;\n\t"
+		"}\n\t"
+		: "=l"(result) : "l"(a), "l"(b), "l"(c));
+	return result;
+
+}
+__forceinline__ __device__ uint64_t sph_t64(uint64_t x)
+{
+uint64_t result;
+ asm("{\n\t"
+    "and.b64 %0,%1,0xFFFFFFFFFFFFFFFF;\n\t"
+    "}\n\t"
+	: "=l"(result) : "l"(x));
+	return result;
+}
+__forceinline__ __device__ uint32_t sph_t32(uint32_t x)
+{
+uint32_t result;
+ asm("{\n\t"
+    "and.b32 %0,%1,0xFFFFFFFF;\n\t"
+    "}\n\t"
+	: "=r"(result) : "r"(x));
+	return result;
+}
+
+__forceinline__ __device__ uint64_t andor(uint64_t a, uint64_t b, uint64_t c)
+{
+	uint64_t result;
+	asm("{\n\t"
+		".reg .u64 m,n,o;\n\t"
+		"and.b64 m,  %1, %2;\n\t"
+		" or.b64 n,  %1, %2;\n\t"
+		"and.b64 o,   n, %3;\n\t"
+		" or.b64 %0,  m, o ;\n\t"
+		"}\n\t"
+		: "=l"(result) : "l"(a), "l"(b), "l"(c));
+	return result;
+
+}
+__forceinline__ __device__ uint32_t andor32(uint32_t a, uint32_t b, uint32_t c)
+{
+	uint32_t result;
+	asm("{\n\t"
+		".reg .u32 m,n,o;\n\t"
+		"and.b32 m,  %1, %2;\n\t"
+		" or.b32 n,  %1, %2;\n\t"
+		"and.b32 o,   n, %3;\n\t"
+		" or.b32 %0,  m, o ;\n\t"
+		"}\n\t"
+		: "=r"(result) : "r"(a), "r"(b), "r"(c));
+	return result;
+
+}
+__forceinline__ __device__ uint64_t shr_t64(uint64_t x,uint32_t n)
+{
+uint64_t result;
+asm("{\n\t"
+	"shr.b64 %0,%1,%2;\n\t"
+    "}\n\t"
+	: "=l"(result) : "l"(x), "r"(n));
+	return result;
+}
+__forceinline__ __device__ uint64_t shl_t64(uint64_t x,uint32_t n)
+{
+uint64_t result;
+asm("{\n\t"
+	"shl.b64 %0,%1,%2;\n\t"
+    "}\n\t"
+	: "=l"(result) : "l"(x), "r"(n));
+	return result;
+}
+__forceinline__ __device__ uint32_t shr_t32(uint32_t x,uint32_t n)
+{
+uint32_t result;
+asm("{\n\t"
+	"shr.b32 %0,%1,%2;\n\t"
+    "}\n\t"
+	: "=r"(result) : "r"(x), "r"(n));
+	return result;
+}
+__forceinline__ __device__ uint32_t shl_t32(uint32_t x,uint32_t n)
+{
+uint32_t result;
+asm("{\n\t"
+	"shl.b32 %0,%1,%2;\n\t"
+    "}\n\t"
+	: "=r"(result) : "r"(x), "r"(n));
+	return result;
+}
+__forceinline__ __device__ void and64(uint64_t &d,uint64_t a,uint64_t b)
+{
+asm("and.b64 %0,%1,%2;" : "=l"(d) : "l"(a), "l"(b));
+}
+
+__forceinline__ __device__ void sbox(uint32_t &a, uint32_t &b,uint32_t &c,uint32_t &d)
+{
+uint32_t t; 
+t = a;
+asm("and.b32 %0,%0,%1;" : "+r"(a) : "r"(c));
+asm("xor.b32 %0,%0,%1;" : "+r"(a) : "r"(d));
+asm("xor.b32 %0,%0,%1;" : "+r"(c) : "r"(b));
+asm("xor.b32 %0,%0,%1;" : "+r"(c) : "r"(a));
+asm( "or.b32 %0,%0,%1;" : "+r"(d) : "r"(t));
+asm("xor.b32 %0,%0,%1;" : "+r"(d) : "r"(b));
+asm("xor.b32 %0,%0,%1;" : "+r"(t) : "r"(c));
+b=d;
+asm( "or.b32 %0,%0,%1;" : "+r"(d) : "r"(t));
+asm("xor.b32 %0,%0,%1;" : "+r"(d) : "r"(a));
+asm("and.b32 %0,%0,%1;" : "+r"(a) : "r"(b));
+asm("xor.b32 %0,%0,%1;" : "+r"(t) : "r"(a));
+asm("xor.b32 %0,%0,%1;" : "+r"(b) : "r"(d));
+asm("xor.b32 %0,%0,%1;" : "+r"(b) : "r"(t));
+a=c;
+c=b;
+b=d;
+asm("not.b32 %0,%1;" : "=r"(d) : "r"(t));
+}
+
+
+
+
+__forceinline__ __device__ void muladd128(uint64_t &u,uint64_t &v,uint64_t a, uint64_t b,uint64_t &c,uint64_t &e)
+{
+
+	asm("{\n\t"
+		".reg .b32 al,ah,bl,bh; \n\t"
+		".reg .b32 x1,x2,x3,x4; \n\t"
+		".reg .b32 cl,ch,el,eh; \n\t"
+		"mov.b64 {al,ah},%2; \n\t"
+		"mov.b64 {bl,bh},%3; \n\t"
+		"mov.b64 {cl,ch},%4; \n\t"
+		"mov.b64 {el,eh},%5; \n\t"
+		"add.cc.u32 x1,cl,el; \n\t"
+		"addc.cc.u32 x2,ch,eh; \n\t"
+		"addc.u32 x3,0,0; \n\t"
+		"mad.lo.cc.u32 x1,bl,al,x1; \n\t"
+		"madc.hi.cc.u32 x2,bl,al,x2; \n\t"
+		"addc.u32      x3,x3,0;         \n\t"
+		"mad.lo.cc.u32    x2,bh,al,x2; \n\t"
+		"madc.hi.cc.u32   x3,bh,al,x3;    \n\t"
+		"addc.u32         x4,0,0;         \n\t"
+		"mad.lo.cc.u32  x2,bl,ah,x2;  \n\t"
+		"madc.hi.cc.u32 x3,bl,ah,x3;  \n\t"
+		"addc.u32       x4,x4,0;         \n\t"
+		"mad.lo.cc.u32  x3,bh,ah,x3;   \n\t"
+		"madc.hi.u32    x4,bh,ah,x4;   \n\t"
+		"mov.b64 %1,{x1,x2}; \n\t"
+		"mov.b64 %0,{x3,x4}; \n\t"
+		"}\n\t"
+		: "=l"(u), "=l"(v) : "l"(a), "l"(b), "l"(c), "l"(e));
+
+}
+
+
+
+__forceinline__ __device__ uint64_t mul(uint64_t a,uint64_t b)
+{
+uint64_t result;
+asm("{\n\t"
+	"mul.lo.u64 %0,%1,%2; \n\t"    
+     "}\n\t"
+	: "=l"(result) : "l"(a) , "l"(b));
+return result;
+}
+
+__device__ __forceinline__ uint64_t shfl(uint64_t x, int lane)
+{
+uint32_t lo,hi;
+asm volatile("mov.b64 {%0,%1},%2;" : "=r"(lo), "=r"(hi) : "l"(x));
+lo = __shfl(lo, lane);
+hi = __shfl(hi, lane);
+asm volatile("mov.b64 %0,{%1,%2};" : "=l"(x) : "r"(lo) , "r"(hi));
+return x;
+}
+
+
+///uint2 method
+
+#if  __CUDA_ARCH__ >= 350 
+__inline__ __device__ uint2 ROR2(const uint2 a, const int offset) {
+	uint2 result;
+	if (offset < 32) {
+		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
+		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
+	}
+	else {
+		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
+		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+		
+	}
+	return result;
+}
+#else
+__inline__ __device__ uint2 ROR2(const uint2 v, const int a) {
+		uint2 result;
+        int n = 64 -a; //lazy
+		if (n <= 32) {
+			result.y = ((v.y << (n)) | (v.x >> (32 - n)));
+			result.x = ((v.x << (n)) | (v.y >> (32 - n)));
+		}
+		else {
+			result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
+			result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
+		}
+		return result;
+	}
+#endif
+
+
+#if  __CUDA_ARCH__ >= 350 
+__inline__ __device__ uint2 ROL2(const uint2 a, const int offset) {
+	uint2 result;
+	if (offset >= 32) {
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));        
+	}
+	else {
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	}
+return result;
+}
+#else
+__inline__ __device__ uint2 ROL2(const uint2 v, const int n) {
+		uint2 result;
+		if (n == 32) {result.x = v.y;result.y=v.x;}
+		if (n < 32) {
+			result.y = ((v.y << (n)) | (v.x >> (32 - n)));
+			result.x = ((v.x << (n)) | (v.y >> (32 - n)));
+		}
+		else {
+			result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
+			result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
+		}
+		return result;
+	}
+#endif
+
+static __forceinline__ __device__ uint64_t devectorize(uint2 v) { return MAKE_ULONGLONG(v.x, v.y); }
+static __forceinline__ __device__ uint2 vectorize(uint64_t v) {
+	uint2 result;
+	LOHI(result.x, result.y, v);
+	return result;
+}
+
+static __forceinline__ __device__ uint2 operator^ (uint2 a, uint2 b) { return make_uint2(a.x ^ b.x, a.y ^ b.y); }
+static __forceinline__ __device__ uint2 operator& (uint2 a, uint2 b) { return make_uint2(a.x & b.x, a.y & b.y); }
+static __forceinline__ __device__ uint2 operator| (uint2 a, uint2 b) { return make_uint2(a.x | b.x, a.y | b.y); }
+static __forceinline__ __device__ uint2 operator~ (uint2 a) { return make_uint2(~a.x, ~a.y); }
+static __forceinline__ __device__ void operator^= (uint2 &a, uint2 b) { a = a ^ b; }
+static __forceinline__ __device__ uint2 operator+ (uint2 a, uint2 b)
+{
+	uint2 result;
+	asm("{\n\t"
+		"add.cc.u32 %0,%2,%4; \n\t"
+		"addc.u32 %1,%3,%5;   \n\t"
+		"}\n\t"
+		: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y));
+	return result;
+}
+static __forceinline__ __device__ void operator+= (uint2 &a, uint2 b) { a = a + b; }
+
+static __forceinline__ __device__ uint2 operator* (uint2 a, uint2 b)
+{ //basic multiplication between 64bit no carry outside that range (ie mul.lo.b64(a*b)) 
+	//(what does uint64 "*" operator) 
+	uint2 result;
+	asm("{\n\t"
+		"mul.lo.u32        %0,%2,%4;  \n\t"
+		"mul.hi.u32        %1,%2,%4;  \n\t"
+		"mad.lo.cc.u32    %1,%3,%4,%1; \n\t"
+		"madc.lo.u32      %1,%3,%5,%1; \n\t"
+		"}\n\t"
+		: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y));
+	return result;
+}
+#if  __CUDA_ARCH__ >= 350 
+static __forceinline__ __device__ uint2 shiftl2(uint2 a, int offset)
+{
+	uint2 result;
+	if (offset<32) {
+		asm("{\n\t"
+			"shf.l.clamp.b32 %1,%2,%3,%4; \n\t"
+			"shl.b32 %0,%2,%4; \n\t"
+			"}\n\t"
+			: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	}
+	else {
+		asm("{\n\t"
+			"shf.l.clamp.b32 %1,%2,%3,%4; \n\t"
+			"shl.b32 %0,%2,%4; \n\t"
+			"}\n\t"
+			: "=r"(result.x), "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
+	}
+	return result;
+}
+static __forceinline__ __device__ uint2 shiftr2(uint2 a, int offset)
+{
+	uint2 result;
+	if (offset<32) {
+		asm("{\n\t"
+			"shf.r.clamp.b32 %0,%2,%3,%4; \n\t"
+			"shr.b32 %1,%3,%4; \n\t"
+			"}\n\t"
+			: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	}
+	else {
+		asm("{\n\t"
+			"shf.l.clamp.b32 %0,%2,%3,%4; \n\t"
+			"shl.b32 %1,%3,%4; \n\t"
+			"}\n\t"
+			: "=r"(result.x), "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
+	}
+	return result;
+}
+#else 
+static __forceinline__ __device__ uint2 shiftl2(uint2 a, int offset)
+{
+	uint2 result;
+	asm("{\n\t"
+		".reg .b64 u,v; \n\t"
+		"mov.b64 v,{%2,%3}; \n\t"
+		"shl.b64 u,v,%4; \n\t"
+		"mov.b64 {%0,%1},v;  \n\t"
+		"}\n\t"
+		: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	return result;
+}
+static __forceinline__ __device__ uint2 shiftr2(uint2 a, int offset)
+{
+	uint2 result;
+	asm("{\n\t"
+		".reg .b64 u,v; \n\t"
+		"mov.b64 v,{%2,%3}; \n\t"
+		"shr.b64 u,v,%4; \n\t"
+		"mov.b64 {%0,%1},v;  \n\t"
+		"}\n\t"
+		: "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	return result;
+}
+#endif
+///////////////////////////////////////////////////////////////////////////////////
+
+
 #endif // #ifndef CUDA_HELPER_H
diff --git a/cuda_vector.h b/cuda_vector.h
new file mode 100644
index 0000000000..0dad00de09
--- /dev/null
+++ b/cuda_vector.h
@@ -0,0 +1,256 @@
+#ifndef CUDA_VECTOR_H
+#define CUDA_VECTOR_H
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
+#define __LDG_PTR   "l"
+#else
+#define __LDG_PTR   "r"
+#endif
+#include "cuda_helper.h"
+
+//typedef __device_builtin__ struct ulong16 ulong16;
+
+typedef struct __align__(32) uint8
+{
+	unsigned int s0, s1, s2, s3, s4, s5, s6, s7;
+} uint8;
+
+typedef struct __align__(64) uint16
+{	
+	union {
+		struct {unsigned int  s0, s1, s2, s3, s4, s5, s6, s7;}; 
+        uint8 lo;
+           };
+	union {
+      struct {unsigned int s8, s9, sa, sb, sc, sd, se, sf;};
+       uint8 hi;};
+} uint16;
+
+
+
+
+static __inline__ __host__ __device__ uint16 make_uint16(
+	unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3, unsigned int s4, unsigned int s5, unsigned int s6, unsigned int s7,
+	unsigned int s8, unsigned int s9, unsigned int sa, unsigned int sb, unsigned int sc, unsigned int sd, unsigned int se, unsigned int sf)
+{
+	uint16 t; t.s0 = s0; t.s1 = s1; t.s2 = s2; t.s3 = s3; t.s4 = s4; t.s5 = s5; t.s6 = s6; t.s7 = s7;
+	t.s8 = s8; t.s9 = s9; t.sa = sa; t.sb = sb; t.sc = sc; t.sd = sd; t.se = se; t.sf = sf;
+	return t;
+}
+static __inline__ __host__ __device__ uint16 make_uint16(const uint8 &a, const uint8 &b)
+{
+uint16 t; t.lo=a; t.hi=b; return t;
+}
+static __inline__ __host__ __device__ uint8 make_uint8(
+	unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3, unsigned int s4, unsigned int s5, unsigned int s6, unsigned int s7)
+{
+	uint8 t; t.s0 = s0; t.s1 = s1; t.s2 = s2; t.s3 = s3; t.s4 = s4; t.s5 = s5; t.s6 = s6; t.s7 = s7;
+	return t;
+}
+
+
+
+
+static __forceinline__ __device__ uchar4 operator^ (uchar4 a, uchar4 b) { return make_uchar4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); }
+static __forceinline__ __device__ uchar4 operator+ (uchar4 a, uchar4 b) { return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
+
+
+static __forceinline__ __device__ uint4 operator^ (uint4 a, uint4 b) { return make_uint4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); }
+static __forceinline__ __device__ uint4 operator+ (uint4 a, uint4 b) { return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
+
+
+static __forceinline__ __device__ ulonglong4 operator^ (ulonglong4 a, ulonglong4 b) { return make_ulonglong4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); }
+static __forceinline__ __device__ ulonglong4 operator+ (ulonglong4 a, ulonglong4 b) { return make_ulonglong4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); }
+static __forceinline__ __device__ ulonglong2 operator^ (ulonglong2 a, ulonglong2 b) { return make_ulonglong2(a.x ^ b.x, a.y ^ b.y); }
+
+
+static __forceinline__ __device__  __host__ uint8 operator^ (const uint8 &a, const uint8 &b) { return make_uint8(a.s0 ^ b.s0, a.s1 ^ b.s1, a.s2 ^ b.s2, a.s3 ^ b.s3, a.s4 ^ b.s4, a.s5 ^ b.s5, a.s6 ^ b.s6, a.s7 ^ b.s7); }
+
+static __forceinline__ __device__  __host__ uint8 operator+ (const uint8 &a, const uint8 &b) { return make_uint8(a.s0 + b.s0, a.s1 + b.s1, a.s2 + b.s2, a.s3 + b.s3, a.s4 + b.s4, a.s5 + b.s5, a.s6 + b.s6, a.s7 + b.s7); }
+
+static __forceinline__ __device__ __host__ uint16 operator^ (const uint16 &a, const uint16 &b) {
+	return make_uint16(a.s0 ^ b.s0, a.s1 ^ b.s1, a.s2 ^ b.s2, a.s3 ^ b.s3, a.s4 ^ b.s4, a.s5 ^ b.s5, a.s6 ^ b.s6, a.s7 ^ b.s7,
+		a.s8 ^ b.s8, a.s9 ^ b.s9, a.sa ^ b.sa, a.sb ^ b.sb, a.sc ^ b.sc, a.sd ^ b.sd, a.se ^ b.se, a.sf ^ b.sf);
+}
+
+
+
+static __forceinline__ __device__  __host__ uint16 operator+ (const uint16 &a, const uint16 &b) {
+	return make_uint16(a.s0 + b.s0, a.s1 + b.s1, a.s2 + b.s2, a.s3 + b.s3, a.s4 + b.s4, a.s5 + b.s5, a.s6 + b.s6, a.s7 + b.s7,
+		a.s8 + b.s8, a.s9 + b.s9, a.sa + b.sa, a.sb + b.sb, a.sc + b.sc, a.sd + b.sd, a.se + b.se, a.sf + b.sf);
+}
+
+
+
+static __forceinline__ __device__ void operator^= (uint4 &a, uint4 b) { a = a ^ b; }
+static __forceinline__ __device__ void operator^= (uchar4 &a, uchar4 b) { a = a ^ b; }
+static __forceinline__ __device__  __host__ void operator^= (uint8 &a, const uint8 &b) { a = a ^ b; }
+static __forceinline__ __device__  __host__ void operator^= (uint16 &a, const uint16 &b) { a = a ^ b; }
+
+
+static __forceinline__ __device__ void operator^= (ulonglong4 &a, const ulonglong4 &b) { a = a ^ b; }
+static __forceinline__ __device__ void operator^= (ulonglong2 &a, const ulonglong2 &b) { a = a ^ b; }
+
+static __forceinline__ __device__ void operator+= (uint4 &a, uint4 b) { a = a + b; }
+static __forceinline__ __device__ void operator+= (uchar4 &a, uchar4 b) { a = a + b; }
+static __forceinline__ __device__  __host__ void operator+= (uint8 &a, const uint8 &b) { a = a + b; }
+static __forceinline__ __device__  __host__ void operator+= (uint16 &a, const uint16 &b) { a = a + b; }
+
+
+static __forceinline__ __device__ uint32_t rotate(uint32_t vec4, uint32_t shift)
+{
+	uint32_t ret;
+	asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(vec4), "r"(vec4), "r"(shift));
+	return ret;
+}
+
+
+static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift)
+{
+	uint32_t ret;
+	asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(vec4), "r"(vec4), "r"(shift));
+	return ret;
+}
+
+
+static __device__ __inline__ uint8 __ldg8(const uint8_t *ptr)
+{
+
+	uint8 test;
+	asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3},[%4];" : "=r"(test.s0), "=r"(test.s1), "=r"(test.s2), "=r"(test.s3) : __LDG_PTR(ptr));
+	asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3},[%4+16];" : "=r"(test.s4), "=r"(test.s5), "=r"(test.s6), "=r"(test.s7) : __LDG_PTR(ptr));
+ 
+	return (test);
+}
+
+
+static __device__ __inline__ uint32_t __ldgtoint(const uint8_t *ptr)
+{
+
+	uint32_t test;
+	asm volatile ("ld.global.nc.u32 {%0},[%1];" : "=r"(test) : __LDG_PTR(ptr));
+	return (test);
+}
+
+static __device__ __inline__ uint32_t __ldgtoint64(const uint8_t *ptr)
+{
+
+	uint64_t test;
+	asm volatile ("ld.global.nc.u64 {%0},[%1];" : "=l"(test) : __LDG_PTR(ptr));
+	return (test);
+}
+
+
+static __device__ __inline__ uint32_t __ldgtoint_unaligned(const uint8_t *ptr)
+{
+
+	uint32_t test;
+	asm volatile ("{\n\t"
+		".reg .u8 a,b,c,d; \n\t"		
+	"ld.global.nc.u8 a,[%1]; \n\t" 
+	"ld.global.nc.u8 b,[%1+1]; \n\t"
+	"ld.global.nc.u8 c,[%1+2]; \n\t"
+	"ld.global.nc.u8 d,[%1+3]; \n\t"
+    "mov.b32 %0,{a,b,c,d}; }\n\t"
+: "=r"(test) : __LDG_PTR(ptr));
+	return (test);
+}
+
+static __device__ __inline__ uint64_t __ldgtoint64_unaligned(const uint8_t *ptr)
+{
+	uint64_t test;
+	asm volatile ("{\n\t"
+		".reg .u8 a,b,c,d,e,f,g,h; \n\t"
+		".reg .u32 i,j; \n\t"
+		"ld.global.nc.u8 a,[%1]; \n\t"
+		"ld.global.nc.u8 b,[%1+1]; \n\t"
+		"ld.global.nc.u8 c,[%1+2]; \n\t"
+		"ld.global.nc.u8 d,[%1+3]; \n\t"
+		"ld.global.nc.u8 e,[%1+4]; \n\t"
+		"ld.global.nc.u8 f,[%1+5]; \n\t"
+		"ld.global.nc.u8 g,[%1+6]; \n\t"
+		"ld.global.nc.u8 h,[%1+7]; \n\t"
+		 "mov.b32 i,{a,b,c,d}; \n\t"
+         "mov.b32 j,{e,f,g,h}; \n\t"
+		 "mov.b64 %0,{i,j}; }\n\t"
+		: "=l"(test) : __LDG_PTR(ptr));
+	return (test);
+}
+
+
+static __device__ __inline__ uint64_t __ldgtoint64_trunc(const uint8_t *ptr)
+{
+	uint32_t zero = 0;
+
+	uint64_t test;
+	asm volatile ("{\n\t"
+		".reg .u8 a,b,c,d; \n\t"
+		".reg .u32 i; \n\t"
+		"ld.global.nc.u8 a,[%1]; \n\t"
+		"ld.global.nc.u8 b,[%1+1]; \n\t"
+		"ld.global.nc.u8 c,[%1+2]; \n\t"
+		"ld.global.nc.u8 d,[%1+3]; \n\t"
+		"mov.b32 i,{a,b,c,d}; \n\t"		
+		"mov.b64 %0,{i,%1}; }\n\t"
+		: "=l"(test) : __LDG_PTR(ptr), "r"(zero));
+	return (test);
+}
+
+
+
+static __device__ __inline__ uint32_t __ldgtoint_unaligned2(const uint8_t *ptr)
+{
+
+	uint32_t test;
+	asm("{\n\t"
+		".reg .u8 e,b,c,d; \n\t"
+		"ld.global.nc.u8 e,[%1]; \n\t"
+		"ld.global.nc.u8 b,[%1+1]; \n\t"
+		"ld.global.nc.u8 c,[%1+2]; \n\t"
+		"ld.global.nc.u8 d,[%1+3]; \n\t"
+		"mov.b32 %0,{e,b,c,d}; }\n\t"
+		: "=r"(test) : __LDG_PTR(ptr));
+	return (test);
+}
+
+
+
+
+static __forceinline__ __device__ uint8 swapvec(const uint8 *buf)
+{
+	uint8 vec;
+	vec.s0 = cuda_swab32(buf[0].s0);
+	vec.s1 = cuda_swab32(buf[0].s1);
+	vec.s2 = cuda_swab32(buf[0].s2);
+	vec.s3 = cuda_swab32(buf[0].s3);
+	vec.s4 = cuda_swab32(buf[0].s4);
+	vec.s5 = cuda_swab32(buf[0].s5);
+	vec.s6 = cuda_swab32(buf[0].s6);
+	vec.s7 = cuda_swab32(buf[0].s7);
+	return vec;
+}
+
+static __forceinline__ __device__ uint16 swapvec(const uint16 *buf)
+{
+	uint16 vec;
+	vec.s0 = cuda_swab32(buf[0].s0);
+	vec.s1 = cuda_swab32(buf[0].s1);
+	vec.s2 = cuda_swab32(buf[0].s2);
+	vec.s3 = cuda_swab32(buf[0].s3);
+	vec.s4 = cuda_swab32(buf[0].s4);
+	vec.s5 = cuda_swab32(buf[0].s5);
+	vec.s6 = cuda_swab32(buf[0].s6);
+	vec.s7 = cuda_swab32(buf[0].s7);
+	vec.s8 = cuda_swab32(buf[0].s8);
+	vec.s9 = cuda_swab32(buf[0].s9);
+	vec.sa = cuda_swab32(buf[0].sa);
+	vec.sb = cuda_swab32(buf[0].sb);
+	vec.sc = cuda_swab32(buf[0].sc);
+	vec.sd = cuda_swab32(buf[0].sd);
+	vec.se = cuda_swab32(buf[0].se);
+	vec.sf = cuda_swab32(buf[0].sf);
+	return vec;
+}
+#endif // #ifndef CUDA_HELPER_H
diff --git a/heavy/heavy.cu b/heavy/heavy.cu
index 98728dc222..37b98e4f47 100644
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@@ -167,6 +167,9 @@ extern "C" int cuda_num_devices()
 // Ger�tenamen holen
 extern char *device_name[8];
 extern int device_map[8];
+int device_major[8]; 
+int device_minor[8];
+int compute_version[8];
 
 extern "C" void cuda_devicenames()
 {
@@ -185,6 +188,9 @@ extern "C" void cuda_devicenames()
         cudaGetDeviceProperties(&props, device_map[i]);
 
         device_name[i] = strdup(props.name);
+		device_major[i] = props.major; 
+		device_minor[i] = props.minor;
+		compute_version[i]= props.major*10+props.minor;
     }
 }
 
diff --git a/lyra2/cuda_lyra2.cu b/lyra2/cuda_lyra2.cu
new file mode 100644
index 0000000000..2c8327f592
--- /dev/null
+++ b/lyra2/cuda_lyra2.cu
@@ -0,0 +1,451 @@
+/*
+ * lyra2 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   djm34
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+
+#include "cuda_helper.h"
+
+
+static __constant__ uint2 blake2b_IV[8] =
+{
+	{ 0xf3bcc908, 0x6a09e667  }, 
+	{ 0x84caa73b, 0xbb67ae85  },
+	{ 0xfe94f82b, 0x3c6ef372  },
+	{ 0x5f1d36f1, 0xa54ff53a  },
+	{ 0xade682d1, 0x510e527f  },
+	{ 0x2b3e6c1f, 0x9b05688c  },
+	{ 0xfb41bd6b, 0x1f83d9ab  },
+	{ 0x137e2179, 0x5be0cd19  }
+};
+
+#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \
+  { \
+	for (int i = 0; i < 8; i++) \
+			{ \
+\
+		for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \
+		round_lyra_v35(state); \
+		for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
+\
+		Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+			} \
+ \
+  } 
+
+#define reduceDuplexRow(rowIn, rowInOut, rowOut) \
+  { \
+	 for (int i = 0; i < 8; i++) \
+	 	 	 	 { \
+		 for (int j = 0; j < 12; j++) \
+			 state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \
+ \
+		 round_lyra_v35(state); \
+		 for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
+\
+		 Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		 Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		 Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		 Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		 Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		 Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		 Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		 Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		 Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		 Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		 Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		 Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+	 	 	 	 } \
+ \
+  } 
+#define absorbblock(in)  { \
+	state[0] ^= Matrix[0][in]; \
+	state[1] ^= Matrix[1][in]; \
+	state[2] ^= Matrix[2][in]; \
+	state[3] ^= Matrix[3][in]; \
+	state[4] ^= Matrix[4][in]; \
+	state[5] ^= Matrix[5][in]; \
+	state[6] ^= Matrix[6][in]; \
+	state[7] ^= Matrix[7][in]; \
+	state[8] ^= Matrix[8][in]; \
+	state[9] ^= Matrix[9][in]; \
+	state[10] ^= Matrix[10][in]; \
+	state[11] ^= Matrix[11][in]; \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+	round_lyra_v35(state); \
+  } 
+
+//// compute 30 version 
+#define reduceDuplexRowSetup_v30(rowIn, rowInOut, rowOut) \
+  { \
+	for (int i = 0; i < 8; i++) \
+				{ \
+\
+		for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \
+		round_lyra_v30(state); \
+		for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
+\
+		Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+				} \
+ \
+  } 
+
+#define reduceDuplexRow_v30(rowIn, rowInOut, rowOut) \
+  { \
+	 for (int i = 0; i < 8; i++) \
+	 	 	 	 	 { \
+		 for (int j = 0; j < 12; j++) \
+			 state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \
+ \
+		 round_lyra_v30(state); \
+		 for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
+\
+		 Matrix[0 + 12 * i][rowInOut] ^= state[11]; \
+		 Matrix[1 + 12 * i][rowInOut] ^= state[0]; \
+		 Matrix[2 + 12 * i][rowInOut] ^= state[1]; \
+		 Matrix[3 + 12 * i][rowInOut] ^= state[2]; \
+		 Matrix[4 + 12 * i][rowInOut] ^= state[3]; \
+		 Matrix[5 + 12 * i][rowInOut] ^= state[4]; \
+		 Matrix[6 + 12 * i][rowInOut] ^= state[5]; \
+		 Matrix[7 + 12 * i][rowInOut] ^= state[6]; \
+		 Matrix[8 + 12 * i][rowInOut] ^= state[7]; \
+		 Matrix[9 + 12 * i][rowInOut] ^= state[8]; \
+		 Matrix[10 + 12 * i][rowInOut] ^= state[9]; \
+		 Matrix[11 + 12 * i][rowInOut] ^= state[10]; \
+	 	 	 	 	 } \
+ \
+  } 
+#define absorbblock_v30(in)  { \
+	state[0] ^= Matrix[0][in]; \
+	state[1] ^= Matrix[1][in]; \
+	state[2] ^= Matrix[2][in]; \
+	state[3] ^= Matrix[3][in]; \
+	state[4] ^= Matrix[4][in]; \
+	state[5] ^= Matrix[5][in]; \
+	state[6] ^= Matrix[6][in]; \
+	state[7] ^= Matrix[7][in]; \
+	state[8] ^= Matrix[8][in]; \
+	state[9] ^= Matrix[9][in]; \
+	state[10] ^= Matrix[10][in]; \
+	state[11] ^= Matrix[11][in]; \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+	round_lyra_v30(state); \
+  } 
+
+
+
+
+ static __device__ __forceinline__ void Gfunc_v35(uint2 & a, uint2 &b, uint2 &c, uint2 &d)
+ {
+	 a += b; d ^= a; d = ROR2(d, 32);
+	 c += d; b ^= c; b = ROR2(b, 24);
+	 a += b; d ^= a; d = ROR2(d, 16);
+	 c += d; b ^= c; b = ROR2(b, 63);
+ }
+
+
+ static __device__ __forceinline__ void Gfunc_v30(uint64_t & a, uint64_t &b, uint64_t &c, uint64_t &d)
+ {
+	 a += b; d ^= a; d = ROTR64(d, 32);
+	 c += d; b ^= c; b = ROTR64(b, 24);
+	 a += b; d ^= a; d = ROTR64(d, 16);
+	 c += d; b ^= c; b = ROTR64(b, 63);
+ }
+
+ 
+static __device__ __forceinline__ void round_lyra_v35(uint2 *s) 
+{
+	Gfunc_v35(s[0], s[4], s[8],  s[12]);
+	Gfunc_v35(s[1], s[5], s[9],  s[13]);
+	Gfunc_v35(s[2], s[6], s[10], s[14]);
+	Gfunc_v35(s[3], s[7], s[11], s[15]);
+	Gfunc_v35(s[0], s[5], s[10], s[15]);
+	Gfunc_v35(s[1], s[6], s[11], s[12]);
+	Gfunc_v35(s[2], s[7], s[8],  s[13]);
+	Gfunc_v35(s[3], s[4], s[9],  s[14]);
+}
+
+static __device__ __forceinline__ void round_lyra_v30(uint64_t *s)
+{
+	Gfunc_v30(s[0], s[4], s[8], s[12]);
+	Gfunc_v30(s[1], s[5], s[9], s[13]);
+	Gfunc_v30(s[2], s[6], s[10], s[14]);
+	Gfunc_v30(s[3], s[7], s[11], s[15]);
+	Gfunc_v30(s[0], s[5], s[10], s[15]);
+	Gfunc_v30(s[1], s[6], s[11], s[12]);
+	Gfunc_v30(s[2], s[7], s[8], s[13]);
+	Gfunc_v30(s[3], s[4], s[9], s[14]);
+}
+
+
+
+__global__ void __launch_bounds__(160, 1) lyra2_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint64_t state[16];
+#pragma unroll
+		for (int i = 0; i<4; i++) { state[i] = outputHash[threads*i + thread]; } //password
+#pragma unroll
+		for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt 
+#pragma unroll
+		for (int i = 0; i<8; i++) { state[i + 8] = devectorize(blake2b_IV[i]); }
+
+		//     blake2blyra x2 
+#pragma unroll 24
+		for (int i = 0; i<24; i++) { round_lyra_v30(state); } //because 12 is not enough
+
+		uint64_t Matrix[96][8]; // not cool
+		/// reducedSqueezeRow0
+#pragma unroll 8 
+		for (int i = 0; i < 8; i++)
+		{
+int idx = 84-12*i;
+#pragma unroll 12
+			for (int j = 0; j<12; j++) { Matrix[j + idx][0] = state[j]; }
+			round_lyra_v30(state);
+		}
+
+		/// reducedSqueezeRow1
+#pragma unroll 8 
+		for (int i = 0; i < 8; i++)
+		{
+int idx0= 12*i;
+int idx1= 84-idx0; 
+#pragma unroll 12
+			for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + idx0][0]; }
+			round_lyra_v30(state);
+#pragma unroll 12  
+			for (int j = 0; j<12; j++) { Matrix[j + idx1][1] = Matrix[j + idx0][0] ^ state[j]; }
+		}
+
+
+		reduceDuplexRowSetup_v30(1, 0, 2);
+		reduceDuplexRowSetup_v30(2, 1, 3);
+		reduceDuplexRowSetup_v30(3, 0, 4);
+		reduceDuplexRowSetup_v30(4, 3, 5);
+		reduceDuplexRowSetup_v30(5, 2, 6);
+		reduceDuplexRowSetup_v30(6, 1, 7);
+
+
+
+		uint64_t rowa;
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(7, rowa, 0);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(0, rowa, 3);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(3, rowa, 6);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(6, rowa, 1);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(1, rowa, 4);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(4, rowa, 7);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(7, rowa, 2);
+		rowa = state[0] & 7;
+		reduceDuplexRow_v30(2, rowa, 5);
+
+		absorbblock_v30(rowa);
+
+
+#pragma unroll
+		for (int i = 0; i<4; i++) {
+			outputHash[threads*i + thread] = state[i];
+		} //password
+
+
+	} //thread
+}
+
+
+__global__ void __launch_bounds__(160, 1) lyra2_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		uint2 state[16];
+#pragma unroll
+		for (int i = 0; i<4; i++) { LOHI(state[i].x, state[i].y, outputHash[threads*i + thread]); } //password
+#pragma unroll
+		for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt 
+#pragma unroll
+		for (int i = 0; i<8; i++) { state[i + 8] = blake2b_IV[i]; }
+
+		//     blake2blyra x2 
+#pragma unroll 24
+		for (int i = 0; i<24; i++) { round_lyra_v35(state); } //because 12 is not enough
+
+		uint2 Matrix[96][8]; // not cool
+
+		/// reducedSqueezeRow0
+#pragma unroll 8 
+		for (int i = 0; i < 8; i++)
+		{
+#pragma unroll 12
+			for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][0] = state[j]; }
+			round_lyra_v35(state);
+		}
+
+		/// reducedSqueezeRow1
+#pragma unroll 8 
+		for (int i = 0; i < 8; i++)
+		{
+#pragma unroll 12
+			for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + 12 * i][0]; }
+			round_lyra_v35(state);
+#pragma unroll 12  
+			for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][1] = Matrix[j + 12 * i][0] ^ state[j]; }
+		}
+
+		reduceDuplexRowSetup(1, 0, 2);
+		reduceDuplexRowSetup(2, 1, 3);
+		reduceDuplexRowSetup(3, 0, 4);
+		reduceDuplexRowSetup(4, 3, 5);
+		reduceDuplexRowSetup(5, 2, 6);
+		reduceDuplexRowSetup(6, 1, 7);
+
+
+
+		uint32_t rowa;
+		rowa = state[0].x & 7;
+		reduceDuplexRow(7, rowa, 0);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(0, rowa, 3);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(3, rowa, 6);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(6, rowa, 1);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(1, rowa, 4);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(4, rowa, 7);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(7, rowa, 2);
+		rowa = state[0].x & 7;
+		reduceDuplexRow(2, rowa, 5);
+
+		absorbblock(rowa);
+
+
+#pragma unroll
+		for (int i = 0; i<4; i++) {
+			outputHash[threads*i + thread] = devectorize(state[i]);
+		} //password
+
+
+	} //thread
+}
+
+   
+void lyra2_cpu_init(int thr_id, int threads)
+{
+//not used    	
+} 
+
+
+__host__ void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+	
+	const int threadsperblock = 160;
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	if (compute_version[thr_id]>=35) {
+	lyra2_gpu_hash_32 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	}
+	else {  // kernel for compute30 card
+	lyra2_gpu_hash_32_v30 << <grid, block, shared_size >> >(threads, startNounce, d_outputHash);
+	}
+    
+	MyStreamSynchronize(NULL, order, thr_id);
+
+}
+
diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu
new file mode 100644
index 0000000000..440c2d197c
--- /dev/null
+++ b/lyra2/lyra2RE.cu
@@ -0,0 +1,160 @@
+
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_keccak.h"
+#include "sph/Lyra2.h"
+
+#include "miner.h"
+}
+
+#include <stdint.h>
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint64_t *d_hash[8];
+
+
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+extern uint32_t quark_check_cpu_hash_64_2(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint64_t *d_inputHash, int order);
+
+
+extern void blake256_cpu_init(int thr_id, int threads);
+extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
+extern void blake256_cpu_setBlock_80(uint32_t *pdata);
+extern void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
+extern void keccak256_cpu_init(int thr_id, int threads);
+extern void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
+extern void skein256_cpu_init(int thr_id, int threads);
+
+extern void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
+extern void lyra2_cpu_init(int thr_id, int threads);
+
+extern void groestl256_setTarget(const void *ptarget);
+extern uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
+extern void groestl256_cpu_init(int thr_id, int threads);
+extern uint32_t groestl256_cpu64_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
+extern void groestl256_cpu64_init(int thr_id, int threads);
+
+
+// X11 Hashfunktion
+inline void lyra_hash(void *state, const void *input)
+{
+    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11
+	sph_blake256_context     ctx_blake;
+	sph_groestl256_context   ctx_groestl;
+	sph_keccak256_context    ctx_keccak;
+	sph_skein256_context     ctx_skein;
+
+	uint32_t hashA[8], hashB[8], hash[8];
+	uint32_t * data = (uint32_t*)input;
+//	for (int i = 0; i<10; i++)	{ printf("cpu data %d %08x %08x\n", i, data[2*i],data[2*i+1]); }
+	sph_blake256_init(&ctx_blake);
+	sph_blake256(&ctx_blake, input, 80);
+	sph_blake256_close(&ctx_blake, hashA);
+	
+	sph_keccak256_init(&ctx_keccak);
+	sph_keccak256(&ctx_keccak, hashA, 32);
+	sph_keccak256_close(&ctx_keccak, hashB);
+
+	LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8);
+
+	sph_skein256_init(&ctx_skein);
+	sph_skein256(&ctx_skein, hashA, 32);
+	sph_skein256_close(&ctx_skein, hashB);
+
+	sph_groestl256_init(&ctx_groestl);
+	sph_groestl256(&ctx_groestl, hashB, 32);
+	sph_groestl256_close(&ctx_groestl, hash);
+//for (int i = 0; i<4; i++)	{ printf("cpu groestl %d %08x %08x\n", i, hash[2 * i], hash[2 * i + 1]); }
+    memcpy(state, hash, 32);
+}
+
+extern float tp_coef[8];
+extern bool opt_benchmark;
+
+extern "C" int scanhash_lyra(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+	if (tp_coef[thr_id]<0) { tp_coef[thr_id] = 4.; }
+	const int throughput = (int) (256*256*tp_coef[thr_id]);
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]); 
+		cudaDeviceReset();
+		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 8 * sizeof(uint32_t) * throughput);
+		blake256_cpu_init(thr_id, throughput);
+		keccak256_cpu_init(thr_id,throughput);
+		skein256_cpu_init(thr_id, throughput);
+		lyra2_cpu_init(thr_id, throughput);
+		groestl256_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); 
+	blake256_cpu_setBlock_80(pdata);
+	groestl256_setTarget(ptarget); 
+
+	do {
+		int order = 0;
+
+		// erstes Blake512 Hash mit CUDA
+		blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+
+		skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// Scan nach Gewinner Hashes auf der GPU
+ uint32_t	foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+//foundNonce = pdata[19]+10;
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			//pdata[19]=foundNonce;
+//			lyra_hash(vhash64, endiandata);
+
+//			if ( ((uint64_t*)vhash64)[3] <= ((uint64_t*)ptarget)[3]) { // && fulltest(vhash64, ptarget)) {
+//				printf("target %08x %08x %08x %08x\n", ptarget[0], ptarget[1], ptarget[2], ptarget[3]);
+//				printf("target %08x %08x %08x %08x\n", ptarget[4], ptarget[5], ptarget[6], ptarget[7]);
+
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+//			} else {
+//				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+//			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/miner.h b/miner.h
index 0e205188fa..75f124c0f3 100644
--- a/miner.h
+++ b/miner.h
@@ -168,6 +168,59 @@ static inline void le16enc(void *pp, uint16_t x)
 }
 #endif
 
+#if !HAVE_DECL_BE64DEC
+static inline uint64_t be64dec(const void *pp)
+{
+	const uint8_t *p = (uint8_t const *)pp;
+	return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) +
+	    ((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) +
+	    ((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) +
+	    ((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56));
+}
+#endif
+
+#if !HAVE_DECL_LE64DEC
+static inline uint64_t le64dec(const void *pp)
+{
+	const uint8_t *p = (uint8_t const *)pp;
+	return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) +
+	    ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) +
+	    ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) +
+	    ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56));
+}
+#endif
+
+#if !HAVE_DECL_BE64ENC
+static inline void be64enc(void *pp, uint64_t x)
+{
+	uint8_t *p = (uint8_t *)pp;
+	p[7] = x & 0xff;
+	p[6] = (x >> 8) & 0xff;
+	p[5] = (x >> 16) & 0xff;
+	p[4] = (x >> 24) & 0xff;
+	p[3] = (x >> 32) & 0xff;
+	p[2] = (x >> 40) & 0xff;
+	p[1] = (x >> 48) & 0xff;
+	p[0] = (x >> 56) & 0xff;
+}
+#endif
+
+#if !HAVE_DECL_LE64ENC
+static inline void le64enc(void *pp, uint64_t x)
+{
+	uint8_t *p = (uint8_t *)pp;
+	p[0] = x & 0xff;
+	p[1] = (x >> 8) & 0xff;
+	p[2] = (x >> 16) & 0xff;
+	p[3] = (x >> 24) & 0xff;
+	p[4] = (x >> 32) & 0xff;
+	p[5] = (x >> 40) & 0xff;
+	p[6] = (x >> 48) & 0xff;
+	p[7] = (x >> 56) & 0xff;
+}
+#endif
+
+
 #if JANSSON_MAJOR_VERSION >= 2
 #define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr))
 #else
@@ -231,10 +284,18 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_qubit(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern int scanhash_nist5(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_fresh(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern int scanhash_x11(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
@@ -243,6 +304,55 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_x14(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_x15(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_x17(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_goal(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_m7(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long  *hashes_done);
+
+extern int scanhash_deep(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_lyra(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_neoscrypt(bool stratum,int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_pluck(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+
+extern int scanhash_keccak256(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_wh(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_doom(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
@@ -258,6 +368,7 @@ struct work_restart {
 	char			padding[128 - sizeof(unsigned long)];
 };
 
+
 extern bool opt_debug;
 extern bool opt_protocol;
 extern int opt_timeout;
@@ -277,11 +388,23 @@ extern struct work_restart *work_restart;
 extern bool opt_trust_pool;
 extern uint16_t opt_vote;
 
+#define JSON_RPC_LONGPOLL	(1 << 0)
+#define JSON_RPC_QUIET_404	(1 << 1)
+extern bool opt_redirect;
+extern bool have_gbt;
+extern bool allow_getwork;
+extern bool opt_redirect;
+
 extern void applog(int prio, const char *fmt, ...);
 extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass,
 	const char *rpc_req, bool, bool, int *);
+extern json_t *json_rpc_call2(CURL *curl, const char *url, const char *userpass,
+	const char *rpc_req, int *curl_err, int flags);
 extern char *bin2hex(const unsigned char *p, size_t len);
+extern void abin2hex(char *s, const unsigned char *p, size_t len);
 extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
+extern int varint_encode(unsigned char *p, uint64_t n);
+extern size_t address_to_script(unsigned char *out, size_t outsz, const char *addr);
 extern int timeval_subtract(struct timeval *result, struct timeval *x,
 	struct timeval *y);
 extern bool fulltest(const uint32_t *hash, const uint32_t *target);
@@ -301,6 +424,13 @@ struct stratum_job {
 	bool clean;
 	unsigned char nreward[2];
 	double diff;
+
+	unsigned char m7prevblock[32];
+	unsigned char m7accroot[32];
+	unsigned char m7merkleroot[32];
+	unsigned char m7height[8];
+	unsigned char m7ntime[8];
+	unsigned char m7version[2];
 };
 
 struct stratum_ctx {
@@ -332,6 +462,7 @@ void stratum_disconnect(struct stratum_ctx *sctx);
 bool stratum_subscribe(struct stratum_ctx *sctx);
 bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass);
 bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
+bool stratum_handle_method_m7(struct stratum_ctx *sctx, const char *s);
 
 struct thread_q;
 
diff --git a/pluck/cuda_pluck.cu b/pluck/cuda_pluck.cu
new file mode 100644
index 0000000000..d17248e93d
--- /dev/null
+++ b/pluck/cuda_pluck.cu
@@ -0,0 +1,632 @@
+/*
+ * "pluck" kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2015  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   djm34
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+ 
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+
+__device__  uint8_t *  hashbuffer;
+uint32_t *d_PlNonce[8];
+__constant__  uint32_t pTarget[8];
+__constant__  uint32_t  c_data[20];
+#include "cuda_vector.h" 
+
+
+#define HASH_MEMORY_8bit 131072
+#define HASH_MEMORY_32bit 32768
+#define HASH_MEMORY 4096
+
+static __constant__  uint32_t H256[8] = {
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372,
+	0xA54FF53A, 0x510E527F, 0x9B05688C,
+	0x1F83D9AB, 0x5BE0CD19
+};
+
+static  __constant__  uint32_t Ksha[64] = {
+	0x428A2F98, 0x71374491,
+	0xB5C0FBCF, 0xE9B5DBA5,
+	0x3956C25B, 0x59F111F1,
+	0x923F82A4, 0xAB1C5ED5,
+	0xD807AA98, 0x12835B01,
+	0x243185BE, 0x550C7DC3,
+	0x72BE5D74, 0x80DEB1FE,
+	0x9BDC06A7, 0xC19BF174,
+	0xE49B69C1, 0xEFBE4786,
+	0x0FC19DC6, 0x240CA1CC,
+	0x2DE92C6F, 0x4A7484AA,
+	0x5CB0A9DC, 0x76F988DA,
+	0x983E5152, 0xA831C66D,
+	0xB00327C8, 0xBF597FC7,
+	0xC6E00BF3, 0xD5A79147,
+	0x06CA6351, 0x14292967,
+	0x27B70A85, 0x2E1B2138,
+	0x4D2C6DFC, 0x53380D13,
+	0x650A7354, 0x766A0ABB,
+	0x81C2C92E, 0x92722C85,
+	0xA2BFE8A1, 0xA81A664B,
+	0xC24B8B70, 0xC76C51A3,
+	0xD192E819, 0xD6990624,
+	0xF40E3585, 0x106AA070,
+	0x19A4C116, 0x1E376C08,
+	0x2748774C, 0x34B0BCB5,
+	0x391C0CB3, 0x4ED8AA4A,
+	0x5B9CCA4F, 0x682E6FF3,
+	0x748F82EE, 0x78A5636F,
+	0x84C87814, 0x8CC70208,
+	0x90BEFFFA, 0xA4506CEB,
+	0xBEF9A3F7, 0xC67178F2
+};
+
+
+#define SALSA(a,b,c,d) { \
+    t =a+d; b^=rotate(t,  7);    \
+    t =b+a; c^=rotate(t,  9);    \
+    t =c+b; d^=rotate(t, 13);    \
+    t =d+c; a^=rotate(t, 18);     \
+}
+
+
+#define SALSA_CORE(state) { \
+\
+SALSA(state.s0,state.s4,state.s8,state.sc); \
+SALSA(state.s5,state.s9,state.sd,state.s1); \
+SALSA(state.sa,state.se,state.s2,state.s6); \
+SALSA(state.sf,state.s3,state.s7,state.sb); \
+SALSA(state.s0,state.s1,state.s2,state.s3); \
+SALSA(state.s5,state.s6,state.s7,state.s4); \
+SALSA(state.sa,state.sb,state.s8,state.s9); \
+SALSA(state.sf,state.sc,state.sd,state.se); \
+	} 
+
+
+static __device__ __forceinline__ uint16 xor_salsa8(const uint16 &Bx)
+{
+	uint32_t t;
+	uint16 state = Bx;
+	SALSA_CORE(state);
+	SALSA_CORE(state);
+	SALSA_CORE(state);
+	SALSA_CORE(state);
+	return(state+Bx);
+}
+
+
+
+// sha256
+
+static __device__ __forceinline__ uint32_t bsg2_0(const uint32_t x)
+{
+	uint32_t r1 = SPH_ROTR32(x, 2);
+	uint32_t r2 = SPH_ROTR32(x, 13);
+	uint32_t r3 = SPH_ROTR32(x, 22);
+	return xor3b(r1, r2, r3);
+}
+
+static __device__ __forceinline__ uint32_t bsg2_1(const uint32_t x)
+{
+	uint32_t r1 = SPH_ROTR32(x, 6);
+	uint32_t r2 = SPH_ROTR32(x, 11);
+	uint32_t r3 = SPH_ROTR32(x, 25);
+	return xor3b(r1, r2, r3);
+}
+
+static __device__ __forceinline__ uint32_t ssg2_0(const uint32_t x)
+{
+	uint64_t r1 = SPH_ROTR32(x, 7);
+	uint64_t r2 = SPH_ROTR32(x, 18);
+	uint64_t r3 = shr_t32(x, 3);
+	return xor3b(r1, r2, r3);
+}
+
+static __device__ __forceinline__ uint32_t ssg2_1(const uint32_t x)
+{
+	uint64_t r1 = SPH_ROTR32(x, 17);
+	uint64_t r2 = SPH_ROTR32(x, 19);
+	uint64_t r3 = shr_t32(x, 10);
+	return xor3b(r1, r2, r3);
+}
+
+static __device__ __forceinline__ void sha2_step1(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e, 
+const uint32_t f, const uint32_t g, uint32_t &h, const uint32_t in, const uint32_t Kshared)
+{
+	uint32_t t1, t2;
+	uint32_t vxandx = xandx(e, f, g);
+	uint32_t bsg21 = bsg2_1(e);
+	uint32_t bsg20 = bsg2_0(a);
+	uint32_t andorv = andor32(a, b, c);
+
+	t1 = h + bsg21 + vxandx + Kshared + in;
+	t2 = bsg20 + andorv;
+	d = d + t1;
+	h = t1 + t2;
+}
+
+static __device__ __forceinline__ void sha2_step2(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e, 
+const uint32_t f, const uint32_t g, uint32_t &h, uint32_t* in, const uint32_t pc, const uint32_t Kshared)
+{
+	uint32_t t1, t2;
+
+	int pcidx1 = (pc - 2) & 0xF;
+	int pcidx2 = (pc - 7) & 0xF;
+	int pcidx3 = (pc - 15) & 0xF;
+	uint32_t inx0 = in[pc];
+	uint32_t inx1 = in[pcidx1];
+	uint32_t inx2 = in[pcidx2];
+	uint32_t inx3 = in[pcidx3];
+
+
+	uint32_t ssg21 = ssg2_1(inx1);
+	uint32_t ssg20 = ssg2_0(inx3);
+	uint32_t vxandx = xandx(e, f, g);
+	uint32_t bsg21 = bsg2_1(e);
+	uint32_t bsg20 = bsg2_0(a);
+	uint32_t andorv = andor32(a, b, c);
+
+	in[pc] = ssg21 + inx2 + ssg20 + inx0;
+
+	t1 = h + bsg21 + vxandx + Kshared + in[pc];
+	t2 = bsg20 + andorv;
+	d = d + t1;
+	h = t1 + t2;
+
+}
+
+
+static __device__ __forceinline__ void sha2_round_body(uint32_t* in, uint32_t* r)
+{
+	uint32_t a = r[0];
+	uint32_t b = r[1];
+	uint32_t c = r[2];
+	uint32_t d = r[3];
+	uint32_t e = r[4];
+	uint32_t f = r[5];
+	uint32_t g = r[6];
+	uint32_t h = r[7];
+
+	sha2_step1(a, b, c, d, e, f, g, h, in[0], Ksha[0]);
+	sha2_step1(h, a, b, c, d, e, f, g, in[1], Ksha[1]);
+	sha2_step1(g, h, a, b, c, d, e, f, in[2], Ksha[2]);
+	sha2_step1(f, g, h, a, b, c, d, e, in[3], Ksha[3]);
+	sha2_step1(e, f, g, h, a, b, c, d, in[4], Ksha[4]);
+	sha2_step1(d, e, f, g, h, a, b, c, in[5], Ksha[5]);
+	sha2_step1(c, d, e, f, g, h, a, b, in[6], Ksha[6]);
+	sha2_step1(b, c, d, e, f, g, h, a, in[7], Ksha[7]);
+	sha2_step1(a, b, c, d, e, f, g, h, in[8], Ksha[8]);
+	sha2_step1(h, a, b, c, d, e, f, g, in[9], Ksha[9]);
+	sha2_step1(g, h, a, b, c, d, e, f, in[10], Ksha[10]);
+	sha2_step1(f, g, h, a, b, c, d, e, in[11], Ksha[11]);
+	sha2_step1(e, f, g, h, a, b, c, d, in[12], Ksha[12]);
+	sha2_step1(d, e, f, g, h, a, b, c, in[13], Ksha[13]);
+	sha2_step1(c, d, e, f, g, h, a, b, in[14], Ksha[14]);
+	sha2_step1(b, c, d, e, f, g, h, a, in[15], Ksha[15]);
+
+#pragma unroll 3
+	for (int i = 0; i<3; i++) {
+
+		sha2_step2(a, b, c, d, e, f, g, h, in, 0, Ksha[16 + 16 * i]);
+		sha2_step2(h, a, b, c, d, e, f, g, in, 1, Ksha[17 + 16 * i]);
+		sha2_step2(g, h, a, b, c, d, e, f, in, 2, Ksha[18 + 16 * i]);
+		sha2_step2(f, g, h, a, b, c, d, e, in, 3, Ksha[19 + 16 * i]);
+		sha2_step2(e, f, g, h, a, b, c, d, in, 4, Ksha[20 + 16 * i]);
+		sha2_step2(d, e, f, g, h, a, b, c, in, 5, Ksha[21 + 16 * i]);
+		sha2_step2(c, d, e, f, g, h, a, b, in, 6, Ksha[22 + 16 * i]);
+		sha2_step2(b, c, d, e, f, g, h, a, in, 7, Ksha[23 + 16 * i]);
+		sha2_step2(a, b, c, d, e, f, g, h, in, 8, Ksha[24 + 16 * i]);
+		sha2_step2(h, a, b, c, d, e, f, g, in, 9, Ksha[25 + 16 * i]);
+		sha2_step2(g, h, a, b, c, d, e, f, in, 10, Ksha[26 + 16 * i]);
+		sha2_step2(f, g, h, a, b, c, d, e, in, 11, Ksha[27 + 16 * i]);
+		sha2_step2(e, f, g, h, a, b, c, d, in, 12, Ksha[28 + 16 * i]);
+		sha2_step2(d, e, f, g, h, a, b, c, in, 13, Ksha[29 + 16 * i]);
+		sha2_step2(c, d, e, f, g, h, a, b, in, 14, Ksha[30 + 16 * i]);
+		sha2_step2(b, c, d, e, f, g, h, a, in, 15, Ksha[31 + 16 * i]);
+
+	}
+
+
+
+	r[0] += a;
+	r[1] += b;
+	r[2] += c;
+	r[3] += d;
+	r[4] += e;
+	r[5] += f;
+	r[6] += g;
+	r[7] += h;
+}
+
+
+static __device__ __forceinline__ uint8 sha256_64(uint32_t *data)
+{
+
+	uint32_t __align__(64) in[16];
+    uint32_t __align__(32) buf[8];
+	
+	((uint16 *)in)[0] = swapvec((uint16*)data);
+
+	((uint8*)buf)[0] = ((uint8*)H256)[0];
+
+	sha2_round_body(in, buf);
+
+#pragma unroll 14
+	for (int i = 0; i<14; i++) { in[i + 1] = 0; }
+	in[0] = 0x80000000;
+	in[15] = 0x200;
+
+
+	sha2_round_body(in, buf);
+	return swapvec((uint8*)buf);
+}
+
+
+static __device__ __forceinline__ uint8 sha256_80(uint32_t nonce)
+{
+
+//	uint32_t in[16], buf[8];
+	uint32_t __align__(64) in[16];
+	uint32_t __align__(32) buf[8];
+	((uint16 *)in)[0] = swapvec((uint16*)c_data);
+
+	((uint8*)buf)[0] = ((uint8*)H256)[0];
+
+	sha2_round_body(in, buf);
+
+
+#pragma unroll 3
+	for (int i = 0; i<3; i++) { in[i] = cuda_swab32(c_data[i + 16]); }
+//	in[3] = cuda_swab32(nonce);
+    in[3] = nonce;
+	in[4] = 0x80000000;
+	in[15] = 0x280;
+
+#pragma unroll 10
+	for (int i = 5; i<15; i++) { in[i] = 0; }
+
+	sha2_round_body(in, buf);
+	return swapvec((uint8*)buf);
+}
+
+
+#define SHIFT 32 * 1024 * 4
+__global__ __launch_bounds__(256, 1) void pluck_gpu_hash0_v50(int threads, uint32_t startNonce)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+
+		int shift = SHIFT * thread; //uint32_t
+		((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
+		((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
+		for (int i = 2; i < 5; i++)
+		{
+			uint32_t randmax = i * 32 - 4;
+			uint32_t randseed[16];
+			uint32_t randbuffer[16];
+			uint32_t joint[16];
+			uint8 Buffbuffer[2];
+
+			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
+			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
+
+			
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+//			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]);
+			((uint8*)joint)[0] = ((uint8*)randseed)[1];
+#pragma unroll
+			for (int j = 0; j < 8; j++)
+			{
+				uint32_t rand = randbuffer[j] % (randmax - 32);
+				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); 
+			}
+
+			uint8 truc = sha256_64(joint);
+			((uint8*)(hashbuffer + shift))[i] = truc;
+			((uint8*)randseed)[0] = ((uint8*)joint)[0];
+			((uint8*)randseed)[1] = truc;
+
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+
+			for (int j = 0; j < 32; j += 2)
+			{
+
+				uint32_t rand = randbuffer[j / 2] % randmax;
+				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
+				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
+				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
+				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
+			}
+
+		} // main loop
+
+} 
+}
+__global__ __launch_bounds__(256, 1) void pluck_gpu_hash_v50(int threads, uint32_t startNonce, uint32_t *nonceVector)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+ 
+		int shift = SHIFT * thread; //uint32_t
+
+		for (int i = 5; i < HASH_MEMORY - 1; i++)
+		{
+			uint32_t randmax = i*32-4;
+			uint32_t randseed[16];
+			uint32_t randbuffer[16];  
+			uint32_t joint[16];
+			uint8 Buffbuffer[2];
+            
+			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32*i-64]);
+			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32*i-32]);           	
+			
+
+                Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32*i - 128]);
+				Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32*i - 96]);
+				((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
+ 
+			((uint16*)randbuffer)[0]= xor_salsa8(((uint16*)randseed)[0]);
+
+			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i-1)<<5]);
+
+#pragma unroll
+			for (int j = 0; j < 8; j++)
+			{
+				uint32_t rand = randbuffer[j] % (randmax - 32); 
+				joint[j+8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); 
+			}
+	
+			uint8 truc =  sha256_64(joint);
+			((uint8*)(hashbuffer + shift))[i] = truc;
+			((uint8*)randseed)[0] = ((uint8*)joint)[0];
+			((uint8*)randseed)[1] = truc;
+
+
+	 ((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
+
+
+ ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+
+			for (int j = 0; j < 32; j += 2)
+			{
+ 
+				uint32_t rand = randbuffer[j / 2] % randmax;
+				
+				(hashbuffer+shift)[rand] =       __ldg(&(hashbuffer+shift)[randmax+j]);
+				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
+				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
+				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
+			}
+ 
+		} // main loop
+
+		uint32_t outbuf =  __ldgtoint(&(hashbuffer + shift)[28]);
+
+		if (outbuf <= pTarget[7]) {
+			nonceVector[0] = nonce;
+		}
+
+	}
+}
+
+__global__ __launch_bounds__(128, 3) void pluck_gpu_hash0(int threads, uint32_t startNonce)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+
+		int shift = SHIFT * thread; //uint32_t
+		((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
+		((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
+		for (int i = 2; i < 5; i++)
+		{
+			uint32_t randmax = i * 32 - 4;
+			uint32_t randseed[16];
+			uint32_t randbuffer[16];
+			uint32_t joint[16];
+			uint8 Buffbuffer[2];
+
+			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
+			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
+
+
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+			//			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]);
+			((uint8*)joint)[0] = ((uint8*)randseed)[1];
+#pragma unroll
+			for (int j = 0; j < 8; j++)
+			{
+				uint32_t rand = randbuffer[j] % (randmax - 32);
+				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
+			}
+
+			uint8 truc = sha256_64(joint);
+			((uint8*)(hashbuffer + shift))[i] = truc;
+			((uint8*)randseed)[0] = ((uint8*)joint)[0];
+			((uint8*)randseed)[1] = truc;
+
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+
+			for (int j = 0; j < 32; j += 2)
+			{
+
+				uint32_t rand = randbuffer[j / 2] % randmax;
+				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
+				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
+				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
+				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
+			}
+
+		} // main loop
+
+	}
+}
+__global__ __launch_bounds__(128, 3) void pluck_gpu_hash(int threads, uint32_t startNonce, uint32_t *nonceVector)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+		const uint32_t nonce = startNonce + thread;
+
+		int shift = SHIFT * thread; //uint32_t
+
+		for (int i = 5; i < HASH_MEMORY - 1; i++)
+		{
+			uint32_t randmax = i * 32 - 4;
+			uint32_t randseed[16];
+			uint32_t randbuffer[16];
+			uint32_t joint[16];
+			uint8 Buffbuffer[2];
+
+			((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
+			((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
+
+
+			Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32 * i - 128]);
+			Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32 * i - 96]);
+			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+			((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]);
+
+#pragma unroll
+			for (int j = 0; j < 8; j++)
+			{
+				uint32_t rand = randbuffer[j] % (randmax - 32);
+				joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]);
+			}
+
+			uint8 truc = sha256_64(joint);
+			((uint8*)(hashbuffer + shift))[i] = truc;
+			((uint8*)randseed)[0] = ((uint8*)joint)[0];
+			((uint8*)randseed)[1] = truc;
+
+
+			((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
+
+
+			((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
+
+
+			for (int j = 0; j < 32; j += 2)
+			{
+
+				uint32_t rand = randbuffer[j / 2] % randmax;
+
+				(hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]);
+				(hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]);
+				(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
+				(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
+			}
+
+		} // main loop
+
+		uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]);
+
+		if (outbuf <= pTarget[7]) {
+			nonceVector[0] = nonce;
+		}
+
+	}
+}
+
+
+void pluck_cpu_init(int thr_id, int threads, uint32_t* hash)
+{
+    
+	cudaMemcpyToSymbol(hashbuffer, &hash, sizeof(hash), 0, cudaMemcpyHostToDevice);
+	cudaMalloc(&d_PlNonce[thr_id], sizeof(uint32_t)); 
+
+} 
+
+
+__host__ uint32_t pluck_cpu_hash(int thr_id, int threads, uint32_t startNounce,  int order)
+{
+	uint32_t result[8] = {0xffffffff};
+	cudaMemset(d_PlNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+
+ 
+	const int threadsperblock = 128;
+	
+ 
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+	dim3 grid50((threads + 256 - 1) / 256);
+	dim3 block50(256);
+
+	if (compute_version[thr_id]==50) {
+	pluck_gpu_hash0_v50 << <grid50, block50 >> >(threads, startNounce);
+	pluck_gpu_hash_v50  << <grid50, block50 >> >(threads, startNounce, d_PlNonce[thr_id]);
+	}
+	else {
+		pluck_gpu_hash0 << <grid, block >> >(threads, startNounce);
+		pluck_gpu_hash << <grid, block >> >(threads, startNounce, d_PlNonce[thr_id]);
+	}
+
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(&result[thr_id], d_PlNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+
+return result[thr_id];
+}
+
+
+
+__host__ void pluck_setBlockTarget(const void *pdata, const void *ptarget)
+{
+	unsigned char PaddedMessage[80];
+	memcpy(PaddedMessage, pdata, 80);
+	cudaMemcpyToSymbol(c_data, PaddedMessage, 10 * sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(pTarget, ptarget, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+}
\ No newline at end of file
diff --git a/pluck/pluck.cu b/pluck/pluck.cu
new file mode 100644
index 0000000000..a9a7d07bc9
--- /dev/null
+++ b/pluck/pluck.cu
@@ -0,0 +1,288 @@
+
+extern "C"
+{
+//#include "sph/neoscrypt.h"
+#include "miner.h"
+}
+
+#include <stdint.h>
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+
+static uint32_t *d_hash[8] ;
+ 
+
+extern void pluck_setBlockTarget(const void* data, const void *ptarget);
+extern void pluck_cpu_init(int thr_id, int threads, uint32_t *d_outputHash);
+extern uint32_t pluck_cpu_hash(int thr_id, int threads, uint32_t startNounce, int order);
+  
+
+extern float tp_coef[8];
+extern bool opt_benchmark;
+
+#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+//note, this is 64 bytes
+static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
+{
+#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+	uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
+	int i;
+
+	x00 = (B[0] ^= Bx[0]);
+	x01 = (B[1] ^= Bx[1]);
+	x02 = (B[2] ^= Bx[2]);
+	x03 = (B[3] ^= Bx[3]);
+	x04 = (B[4] ^= Bx[4]);
+	x05 = (B[5] ^= Bx[5]);
+	x06 = (B[6] ^= Bx[6]);
+	x07 = (B[7] ^= Bx[7]);
+	x08 = (B[8] ^= Bx[8]);
+	x09 = (B[9] ^= Bx[9]);
+	x10 = (B[10] ^= Bx[10]);
+	x11 = (B[11] ^= Bx[11]);
+	x12 = (B[12] ^= Bx[12]);
+	x13 = (B[13] ^= Bx[13]);
+	x14 = (B[14] ^= Bx[14]);
+	x15 = (B[15] ^= Bx[15]);
+	for (i = 0; i < 8; i += 2) {
+		/* Operate on columns. */
+		x04 ^= ROTL(x00 + x12, 7);  x09 ^= ROTL(x05 + x01, 7);
+		x14 ^= ROTL(x10 + x06, 7);  x03 ^= ROTL(x15 + x11, 7);
+
+		x08 ^= ROTL(x04 + x00, 9);  x13 ^= ROTL(x09 + x05, 9);
+		x02 ^= ROTL(x14 + x10, 9);  x07 ^= ROTL(x03 + x15, 9);
+
+		x12 ^= ROTL(x08 + x04, 13);  x01 ^= ROTL(x13 + x09, 13);
+		x06 ^= ROTL(x02 + x14, 13);  x11 ^= ROTL(x07 + x03, 13);
+
+		x00 ^= ROTL(x12 + x08, 18);  x05 ^= ROTL(x01 + x13, 18);
+		x10 ^= ROTL(x06 + x02, 18);  x15 ^= ROTL(x11 + x07, 18);
+
+		/* Operate on rows. */
+		x01 ^= ROTL(x00 + x03, 7);  x06 ^= ROTL(x05 + x04, 7);
+		x11 ^= ROTL(x10 + x09, 7);  x12 ^= ROTL(x15 + x14, 7);
+
+		x02 ^= ROTL(x01 + x00, 9);  x07 ^= ROTL(x06 + x05, 9);
+		x08 ^= ROTL(x11 + x10, 9);  x13 ^= ROTL(x12 + x15, 9);
+
+		x03 ^= ROTL(x02 + x01, 13);  x04 ^= ROTL(x07 + x06, 13);
+		x09 ^= ROTL(x08 + x11, 13);  x14 ^= ROTL(x13 + x12, 13);
+
+		x00 ^= ROTL(x03 + x02, 18);  x05 ^= ROTL(x04 + x07, 18);
+		x10 ^= ROTL(x09 + x08, 18);  x15 ^= ROTL(x14 + x13, 18);
+	}
+	B[0] += x00;
+	B[1] += x01;
+	B[2] += x02;
+	B[3] += x03;
+	B[4] += x04;
+	B[5] += x05;
+	B[6] += x06;
+	B[7] += x07;
+	B[8] += x08;
+	B[9] += x09;
+	B[10] += x10;
+	B[11] += x11;
+	B[12] += x12;
+	B[13] += x13;
+	B[14] += x14;
+	B[15] += x15;
+#undef ROTL
+}
+
+void sha256_hash(unsigned char *hash, const unsigned char *data, int len)
+{
+	uint32_t S[16], T[16];
+	int i, r;
+
+	sha256_init(S);
+	for (r = len; r > -9; r -= 64) {
+		if (r < 64)
+			memset(T, 0, 64);
+		memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
+		if (r >= 0 && r < 64)
+			((unsigned char *)T)[r] = 0x80;
+		for (i = 0; i < 16; i++) 
+			T[i] = be32dec(T + i);
+
+		if (r < 56)
+			T[15] = 8 * len;
+		sha256_transform(S, T, 0);
+	}
+	for (i = 0; i < 8; i++)
+		be32enc((uint32_t *)hash + i, S[i]);
+}
+
+void sha256_hash512(unsigned char *hash, const unsigned char *data)
+{
+	uint32_t S[16], T[16];
+	int i;
+
+	sha256_init(S);
+
+	memcpy(T, data, 64);
+	for (i = 0; i < 16; i++)
+		T[i] = be32dec(T + i);
+	sha256_transform(S, T, 0);
+
+	memset(T, 0, 64);
+	//memcpy(T, data + 64, 0);
+	((unsigned char *)T)[0] = 0x80;
+	for (i = 0; i < 16; i++)
+		T[i] = be32dec(T + i);
+	T[15] = 8 * 64;
+	sha256_transform(S, T, 0);
+
+	for (i = 0; i < 8; i++)
+		be32enc((uint32_t *)hash + i, S[i]);
+}
+
+inline void pluck(uint32_t *hash, uint32_t *input)
+{
+
+	uint32_t data[20];
+	
+	//uint32_t midstate[8];
+//	printf("coming here\n");
+	const int HASH_MEMORY = 128 * 1024;
+	uint8_t * scratchbuf = (uint8_t*)malloc(HASH_MEMORY);
+	
+	
+	for (int k = 0; k<20; k++) { data[k] = input[k]; }
+	
+		
+		uint8_t *hashbuffer = scratchbuf; //don't allocate this on stack, since it's huge.. 
+		int size = HASH_MEMORY;
+//        int size = 224+64;
+		memset(hashbuffer, 0, 64);
+		
+//		for (int k = 0; k<10; k++) {
+//			printf("cpu init data %d %08x %08x\n", k, ((uint32_t*)(data))[2 * k], ((uint32_t*)(data))[2 * k + 1]);}
+		sha256_hash(&hashbuffer[0], (uint8_t*)data, 80);
+//		for (int k = 0; k<8; k++) { printf("cpu hash %d %08x \n", k, ((uint32_t*)hashbuffer)[k]); }
+
+		for (int i = 64; i < size - 32; i += 32)
+		{
+			//i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area
+			int randmax = i - 4; //we could use size here, but then it's probable to use 0 as the value in most cases
+			uint32_t joint[16];
+			uint32_t randbuffer[16];
+
+			uint32_t randseed[16];
+			memcpy(randseed, &hashbuffer[i - 64], 64);
+			if (i>128)
+			{
+				memcpy(randbuffer, &hashbuffer[i - 128], 64);
+			}
+			else
+			{
+				memset(&randbuffer, 0, 64);
+			}
+
+			xor_salsa8(randbuffer, randseed);
+			
+			memcpy(joint, &hashbuffer[i - 32], 32);
+			//use the last hash value as the seed
+			for (int j = 32; j < 64; j += 4)
+			{
+				uint32_t rand = randbuffer[(j - 32) / 4] % (randmax - 32); //randmax - 32 as otherwise we go beyond memory that's already been written to
+				joint[j / 4] = *((uint32_t*)&hashbuffer[rand]);
+			}
+			sha256_hash512(&hashbuffer[i], (uint8_t*)joint);
+//			for (int k = 0; k<8; k++) { printf("sha hashbuffer %d %08x\n", k, ((uint32_t*)(hashbuffer+i))[k]); }
+			memcpy(randseed, &hashbuffer[i - 32], 64); //use last hash value and previous hash value(post-mixing)
+			if (i>128)
+			{
+				memcpy(randbuffer, &hashbuffer[i - 128], 64);
+			}
+			else
+			{
+				memset(randbuffer, 0, 64);
+			}
+			xor_salsa8(randbuffer, randseed);
+			for (int j = 0; j < 32; j += 2)
+			{
+				uint32_t rand = randbuffer[j / 2] % randmax;
+				*((uint32_t*)&hashbuffer[rand]) = *((uint32_t*)&hashbuffer[j + i - 4]);
+			}
+		}
+
+//		for (int k = 0; k<8; k++) { printf("cpu final hash %d %08x\n", k, ((uint32_t*)hashbuffer)[k]); }
+
+		//note: off-by-one error is likely here...     
+/*
+		for (int i = size - 64 - 1; i >= 64; i -= 64)
+		{
+			sha256_hash512(&hashbuffer[i - 64], &hashbuffer[i]);
+		}
+
+		for (int k = 0; k<8; k++) { printf("cpu after of by one final hash %d %08x\n", k, ((uint32_t*)hashbuffer)[k]); }
+*/
+		memcpy((unsigned char*)hash, hashbuffer, 32);
+}
+
+extern "C" int scanhash_pluck(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+	if (tp_coef[thr_id]<0) { tp_coef[thr_id]=2.45; }
+	const int throughput = (uint32_t)((float)(32*1*64*tp_coef[thr_id]));
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]); 
+		cudaDeviceReset();
+		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 32 * 1024 * sizeof(uint32_t) * throughput);
+
+
+		pluck_cpu_init(thr_id, throughput,d_hash[thr_id]);
+		init[thr_id] = true;
+	}
+
+	uint32_t endiandata[20];
+
+		for (int k = 0; k < 20; k++) 
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+ 
+	pluck_setBlockTarget(endiandata,ptarget);
+
+	do {
+		int order = 0;
+		uint32_t foundNonce = pluck_cpu_hash(thr_id, throughput, pdata[19], order++);
+//		foundNonce = pdata[19];
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+
+//             be32enc(&endiandata[19], foundNonce);
+//             pluck(vhash64,endiandata);
+//			 printf("target %08x vhash64 %08x", ptarget[7], vhash64[7]);
+//			if ( vhash64[7] <= ptarget[7]) { // && fulltest(vhash64, ptarget)) {
+				pdata[19] = foundNonce;
+				*hashes_done = foundNonce - first_nonce + 1;
+				return 1;
+//			} else {
+//				*hashes_done = foundNonce - first_nonce + 1; // keeps hashrate calculation happy
+//				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+//			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/deep.cu b/qubit/deep.cu
new file mode 100644
index 0000000000..94ff57f11e
--- /dev/null
+++ b/qubit/deep.cu
@@ -0,0 +1,151 @@
+/*
+ * deepcoin algorithm 
+ * 
+ */
+
+extern "C"
+{
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+
+// X13 Hashfunktion
+inline void deephash(void *state, const void *input)
+{
+    // luffa1-cubehash2-shavite3-simd4-echo5
+   
+    sph_luffa512_context ctx_luffa;
+    sph_cubehash512_context ctx_cubehash;
+    sph_echo512_context ctx_echo;
+    
+
+    uint32_t hash[16];
+
+    sph_luffa512_init(&ctx_luffa);
+    // ZBLAKE;
+    sph_luffa512 (&ctx_luffa, input, 80);
+    sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+    sph_cubehash512_init(&ctx_cubehash);
+    // ZCUBEHASH;
+    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+    sph_echo512_init(&ctx_echo);
+    // ZECHO
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		
+
+
+		quark_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes luffa512 Hash mit CUDA
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Cubehash512
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r ECHO512
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+	
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			deephash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/doom.cu b/qubit/doom.cu
new file mode 100644
index 0000000000..e9557a02f3
--- /dev/null
+++ b/qubit/doom.cu
@@ -0,0 +1,110 @@
+/*
+ * qubit algorithm 
+ * 
+ */
+
+extern "C"
+{
+
+#include "sph/sph_luffa.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget);
+extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+inline void doomhash(void *state, const void *input)
+{
+    // luffa512
+   
+    sph_luffa512_context ctx_luffa;
+    
+
+    uint32_t hash[16];
+
+    sph_luffa512_init(&ctx_luffa);
+    sph_luffa512 (&ctx_luffa, input, 80);
+    sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget);
+	
+
+	do {
+		int order = 0;
+
+		uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);		
+	    if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			doomhash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
new file mode 100644
index 0000000000..7fe1c485a4
--- /dev/null
+++ b/qubit/qubit.cu
@@ -0,0 +1,180 @@
+/*
+ * qubit algorithm 
+ * 
+ */
+
+extern "C"
+{
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+
+extern void qubit_luffa512_cpu_init(int thr_id, int threads);
+extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
+extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+// X13 Hashfunktion
+inline void qubithash(void *state, const void *input)
+{
+    // luffa1-cubehash2-shavite3-simd4-echo5
+   
+    sph_luffa512_context ctx_luffa;
+    sph_cubehash512_context ctx_cubehash;
+    sph_shavite512_context ctx_shavite;
+    sph_simd512_context ctx_simd;
+    sph_echo512_context ctx_echo;
+    
+
+    uint32_t hash[16];
+
+    sph_luffa512_init(&ctx_luffa);
+    // ZBLAKE;
+    sph_luffa512 (&ctx_luffa, input, 80);
+    sph_luffa512_close(&ctx_luffa, (void*) hash);
+
+    sph_cubehash512_init(&ctx_cubehash);
+    // ZCUBEHASH;
+    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+    sph_shavite512_init(&ctx_shavite);
+    // ZSHAVITE;
+    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+    sph_simd512_init(&ctx_simd);
+    // ZSIMD
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+    sph_echo512_init(&ctx_echo);
+    // ZECHO
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		qubit_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		
+
+
+		quark_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	qubit_luffa512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes luffa512 Hash mit CUDA
+		qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Cubehash512
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Shavite512
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r ECHO512
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+	
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			qubithash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu
new file mode 100644
index 0000000000..324f87df26
--- /dev/null
+++ b/qubit/qubit_luffa512.cu
@@ -0,0 +1,512 @@
+/*
+ * luffa_for_32.c
+ * Version 2.0 (Sep 15th 2009)
+ *
+ * Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved.
+ *
+ * Hitachi, Ltd. is the owner of this software and hereby grant
+ * the U.S. Government and any interested party the right to use
+ * this software for the purposes of the SHA-3 evaluation process,
+ * notwithstanding that this software is copyrighted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+typedef unsigned char BitSequence;
+
+
+#include "cuda_helper.h"
+__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+__constant__ uint32_t pTarget[8];
+uint32_t *d_lnounce[8];
+uint32_t *d_LNonce[8];
+
+typedef struct {
+    uint32_t buffer[8]; /* Buffer to be hashed */
+    uint32_t chainv[40];   /* Chaining values */
+} hashState;
+
+
+static __device__ __forceinline__ uint32_t BYTES_SWAP32(uint32_t x)
+{
+	return __byte_perm(x, x, 0x0123);
+}
+
+#define MULT2(a,j)\
+    tmp = a[7+(8*j)];\
+    a[7+(8*j)] = a[6+(8*j)];\
+    a[6+(8*j)] = a[5+(8*j)];\
+    a[5+(8*j)] = a[4+(8*j)];\
+    a[4+(8*j)] = a[3+(8*j)] ^ tmp;\
+    a[3+(8*j)] = a[2+(8*j)] ^ tmp;\
+    a[2+(8*j)] = a[1+(8*j)];\
+    a[1+(8*j)] = a[0+(8*j)] ^ tmp;\
+    a[0+(8*j)] = tmp;
+
+#define TWEAK(a0,a1,a2,a3,j)\
+    a0 = (a0<<(j))|(a0>>(32-j));\
+    a1 = (a1<<(j))|(a1>>(32-j));\
+    a2 = (a2<<(j))|(a2>>(32-j));\
+    a3 = (a3<<(j))|(a3>>(32-j));
+
+#define STEP(c0,c1)\
+    SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\
+    SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\
+    MIXWORD(chainv[0],chainv[4]);\
+    MIXWORD(chainv[1],chainv[5]);\
+    MIXWORD(chainv[2],chainv[6]);\
+    MIXWORD(chainv[3],chainv[7]);\
+    ADD_CONSTANT(chainv[0],chainv[4],c0,c1);
+
+#define SUBCRUMB(a0,a1,a2,a3,a4)\
+    a4  = a0;\
+    a0 |= a1;\
+    a2 ^= a3;\
+    a1  = ~a1;\
+    a0 ^= a3;\
+    a3 &= a4;\
+    a1 ^= a3;\
+    a3 ^= a2;\
+    a2 &= a0;\
+    a0  = ~a0;\
+    a2 ^= a1;\
+    a1 |= a3;\
+    a4 ^= a1;\
+    a3 ^= a2;\
+    a2 &= a1;\
+    a1 ^= a0;\
+    a0  = a4;
+
+#define MIXWORD(a0,a4)\
+    a4 ^= a0;\
+    a0  = (a0<<2) | (a0>>(30));\
+    a0 ^= a4;\
+    a4  = (a4<<14) | (a4>>(18));\
+    a4 ^= a0;\
+    a0  = (a0<<10) | (a0>>(22));\
+    a0 ^= a4;\
+    a4  = (a4<<1) | (a4>>(31));
+
+#define ADD_CONSTANT(a0,b0,c0,c1)\
+    a0 ^= c0;\
+    b0 ^= c1;
+
+/* initial values of chaining variables */
+__constant__ uint32_t c_IV[40];
+const uint32_t h2_IV[40] = {
+    0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465,
+    0x6e292011,0x90152df4,0xee058139,0xdef610bb,
+    0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3,
+    0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581,
+    0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05,
+    0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7,
+    0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67,
+    0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce,
+    0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363,
+    0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea};
+
+__constant__ uint32_t c_CNS[80];
+uint32_t h2_CNS[80] = {
+    0x303994a6,0xe0337818,0xc0e65299,0x441ba90d,
+    0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f,
+    0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4,
+    0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d,
+    0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4,
+    0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28,
+    0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b,
+    0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704,
+    0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72,
+    0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7,
+    0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719,
+    0xd9847356,0x36eda57f,0xa2c78434,0x703aace7,
+    0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91,
+    0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be,
+    0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5,
+    0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355,
+    0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab,
+    0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0,
+    0x78602649,0x29131ab6,0x8edae952,0x0fc053c3,
+    0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31};
+
+
+/***************************************************/
+__device__ __forceinline__ void rnd512(hashState *state)
+{
+    int i,j;
+    uint32_t t[40];
+    uint32_t chainv[8];
+    uint32_t tmp;
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        t[i]=0;
+#pragma unroll 5
+        for(j=0;j<5;j++) {
+            t[i] ^= state->chainv[i+8*j];
+        }
+    }
+
+    MULT2(t, 0);
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            state->chainv[i+8*j] ^= t[i];
+        }
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            t[i+8*j] = state->chainv[i+8*j];
+        }
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+        MULT2(state->chainv, j);
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            state->chainv[8*j+i] ^= t[8*((j+1)%5)+i];
+        }
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            t[i+8*j] = state->chainv[i+8*j];
+        }
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+        MULT2(state->chainv, j);
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            state->chainv[8*j+i] ^= t[8*((j+4)%5)+i];
+        }
+    }
+
+#pragma unroll 5
+    for(j=0;j<5;j++) {
+#pragma unroll 8
+        for(i=0;i<8;i++) {
+            state->chainv[i+8*j] ^= state->buffer[i];
+        }
+        MULT2(state->buffer, 0);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        chainv[i] = state->chainv[i];
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        state->chainv[i] = chainv[i];
+        chainv[i] = state->chainv[i+8];
+    }
+
+    TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        state->chainv[i+8] = chainv[i];
+        chainv[i] = state->chainv[i+16];
+    }
+
+    TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        state->chainv[i+16] = chainv[i];
+        chainv[i] = state->chainv[i+24];
+    }
+
+    TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        state->chainv[i+24] = chainv[i];
+        chainv[i] = state->chainv[i+32];
+    }
+
+    TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]);
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        state->chainv[i+32] = chainv[i];
+    }
+}
+
+
+__device__ __forceinline__ void Update512(hashState *state, const BitSequence *data) 
+{
+#pragma unroll 8
+    for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]);
+    rnd512(state);
+
+#pragma unroll 8
+    for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]);
+    rnd512(state);
+#pragma unroll 4
+    for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]);
+}
+
+
+/***************************************************/
+__device__ __forceinline__ void finalization512(hashState *state, uint32_t *b)
+{
+    int i,j;
+
+    state->buffer[4] = 0x80000000;
+#pragma unroll 3
+    for(int i=5;i<8;i++) state->buffer[i] = 0;
+    rnd512(state);
+
+    /*---- blank round with m=0 ----*/
+#pragma unroll 8
+    for(i=0;i<8;i++) state->buffer[i] =0;
+    rnd512(state);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        b[i] = 0;
+#pragma unroll 5
+        for(j=0;j<5;j++) {
+            b[i] ^= state->chainv[i+8*j];
+        }
+        b[i] = BYTES_SWAP32((b[i]));
+    }
+
+#pragma unroll 8
+    for(i=0;i<8;i++) state->buffer[i]=0;
+    rnd512(state);
+
+#pragma unroll 8
+    for(i=0;i<8;i++) {
+        b[8+i] = 0;
+#pragma unroll 5
+        for(j=0;j<5;j++) {
+            b[8+i] ^= state->chainv[i+8*j];
+        }
+        b[8+i] = BYTES_SWAP32((b[8+i]));
+    }
+}
+
+
+/***************************************************/
+// Die Hash-Funktion
+__global__ void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
+{
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = startNounce + thread;
+        
+union {
+uint64_t buf64[16];
+uint32_t buf32[32];
+} buff;
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+        hashState state;
+#pragma unroll 40
+        for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+        for(int i=0;i<8;i++) state.buffer[i] = 0;
+        Update512(&state, (BitSequence*)buff.buf32);
+		uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
+        finalization512(&state, (uint32_t*)outHash);
+    }
+}
+
+__global__ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
+{
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = startNounce + thread;
+        
+union {
+uint64_t buf64[16];
+uint32_t buf32[32];
+} buff;
+
+uint32_t Hash[16];
+#pragma unroll 16
+		for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i];
+
+		// die Nounce durch die thread-spezifische ersetzen
+		buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce));
+
+
+        hashState state;
+#pragma unroll 40
+        for(int i=0;i<40;i++) state.chainv[i] = c_IV[i];
+#pragma unroll 8
+        for(int i=0;i<8;i++) state.buffer[i] = 0;
+        Update512(&state, (BitSequence*)buff.buf32);
+        finalization512(&state, Hash);
+
+		bool rc = true;
+		int position = -1;
+#pragma unroll 8	
+		for (int i = 7; i >= 0; i--) {
+			if (Hash[i] > pTarget[i]) {
+				if(position < i) {
+                    position = i;
+                    rc = false;
+                }
+				
+			}
+			if (Hash[i] < pTarget[i]) {
+				if(position < i) {
+                    position = i;
+                    rc = true;
+                }
+			}
+		}
+
+		if(rc == true)
+		{
+			if(resNounce[0] > nounce)
+				resNounce[0] = nounce;
+		}
+
+    }
+}
+// Setup-Funktionen
+__host__ void qubit_luffa512_cpu_init(int thr_id, int threads)
+{
+    cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice );
+    cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice );
+	cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t)); 
+	cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t));
+}
+
+__host__ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t));
+    const int threadsperblock = 256;
+
+    // berechne wie viele Thread Blocks wir brauchen
+    dim3 grid((threads + threadsperblock-1)/threadsperblock);
+    dim3 block(threadsperblock);
+
+    // Gr��e des dynamischen Shared Memory Bereichs
+    size_t shared_size = 0;
+
+    qubit_luffa512_gpu_finalhash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]);
+    MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	//cudaThreadSynchronize();
+	result = *d_lnounce[thr_id];
+	return result;
+}
+
+__host__ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order)
+{
+    const int threadsperblock = 256;
+
+    // berechne wie viele Thread Blocks wir brauchen
+    dim3 grid((threads + threadsperblock-1)/threadsperblock);
+    dim3 block(threadsperblock);
+
+    // Gr��e des dynamischen Shared Memory Bereichs
+    size_t shared_size = 0;
+
+    qubit_luffa512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+    MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__ void qubit_luffa512_cpu_setBlock_80(void *pdata)
+{
+	// Message mit Padding bereitstellen
+	// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+
+	// die Message zur Berechnung auf der GPU
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
+
+__host__ void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget)
+{
+	// Message mit Padding bereitstellen
+	// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	PaddedMessage[80] = 0x80;
+	PaddedMessage[111] = 1;
+	PaddedMessage[126] = 0x02;
+	PaddedMessage[127] = 0x80;
+	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+	
+	// die Message zur Berechnung auf der GPU
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+}
\ No newline at end of file
diff --git a/sph/Lyra2.c b/sph/Lyra2.c
new file mode 100644
index 0000000000..309dc6171d
--- /dev/null
+++ b/sph/Lyra2.c
@@ -0,0 +1,215 @@
+/**
+ * Implementation of the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "Lyra2.h"
+#include "Sponge.h"
+
+/**
+ * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
+ * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
+ * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
+ * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
+ * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
+ *
+ * @param K The derived key to be output by the algorithm
+ * @param kLen Desired key length
+ * @param pwd User password
+ * @param pwdlen Password length
+ * @param salt Salt
+ * @param saltlen Salt length
+ * @param timeCost Parameter to determine the processing time (T)
+ * @param nRows Number or rows of the memory matrix (R)
+ * @param nCols Number of columns of the memory matrix (C)
+ *
+ * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
+ */
+int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) {
+
+    //============================= Basic variables ============================//
+    int64_t row = 2; //index of row to be processed
+    int64_t prev = 1; //index of prev (last row ever computed/modified)
+    int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
+    int64_t tau; //Time Loop iterator
+    int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
+    int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
+    int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
+    int64_t i; //auxiliary iteration counter
+    //==========================================================================/
+
+    //========== Initializing the Memory Matrix and pointers to it =============//
+    //Tries to allocate enough space for the whole memory matrix
+    i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
+    uint64_t *wholeMatrix = malloc(i);
+    if (wholeMatrix == NULL) {
+      return -1;
+    }
+	memset(wholeMatrix, 0, i);
+
+    //Allocates pointers to each row of the matrix
+    uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
+    if (memMatrix == NULL) {
+      return -1;
+    }
+    //Places the pointers in the correct positions
+    uint64_t *ptrWord = wholeMatrix;
+    for (i = 0; i < nRows; i++) {
+      memMatrix[i] = ptrWord;
+      ptrWord += ROW_LEN_INT64;
+    }
+    //==========================================================================/
+
+    //============= Getting the password + salt + basil padded with 10*1 ===============//
+    //OBS.:The memory matrix will temporarily hold the password: not for saving memory,
+    //but this ensures that the password copied locally will be overwritten as soon as possible
+
+    //First, we clean enough blocks for the password, salt, basil and padding
+    uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
+	
+    byte *ptrByte = (byte*) wholeMatrix;
+    memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES);
+
+    //Prepends the password
+    memcpy(ptrByte, pwd, pwdlen);
+    ptrByte += pwdlen;
+
+    //Concatenates the salt
+    memcpy(ptrByte, salt, saltlen);
+    ptrByte += saltlen;
+
+    //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
+    memcpy(ptrByte, &kLen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &saltlen, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &timeCost, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nRows, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+    memcpy(ptrByte, &nCols, sizeof (uint64_t));
+    ptrByte += sizeof (uint64_t);
+
+    //Now comes the padding
+    *ptrByte = 0x80; //first byte of padding: right after the password
+    ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
+    ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
+    *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
+    //==========================================================================/
+
+    //======================= Initializing the Sponge State ====================//
+    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
+    uint64_t *state = malloc(16 * sizeof (uint64_t));
+    if (state == NULL) {
+      return -1;
+    }
+    initState(state);
+    //==========================================================================/
+
+    //================================ Setup Phase =============================//
+    //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
+    ptrWord = wholeMatrix;
+    for (i = 0; i < nBlocksInput; i++) {
+      absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
+      ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil)
+    }
+	
+    //Initializes M[0] and M[1]
+    reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here
+
+
+
+    reducedDuplexRow1(state, memMatrix[0], memMatrix[1]);
+	
+
+    do {
+      //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
+		
+      reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
+
+
+
+      //updates the value of row* (deterministically picked during Setup))
+      rowa = (rowa + step) & (window - 1);
+      //update prev: it now points to the last row ever computed
+      prev = row;
+      //updates row: goes to the next row to be computed
+      row++;
+
+      //Checks if all rows in the window where visited.
+      if (rowa == 0) {
+      step = window + gap; //changes the step: approximately doubles its value
+      window *= 2; //doubles the size of the re-visitation window
+      gap = -gap; //inverts the modifier to the step
+    }
+
+    } while (row < nRows);
+    //==========================================================================/
+
+    //============================ Wandering Phase =============================//
+    row = 0; //Resets the visitation to the first row of the memory matrix
+    for (tau = 1; tau <= timeCost; tau++) {
+    	//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
+    	step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
+    	do {
+  	    //Selects a pseudorandom index row*
+  	    //------------------------------------------------------------------------------------------
+  	    //rowa = ((unsigned int)state[0]) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+  	    //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
+  	    reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]);
+
+  	    //update prev: it now points to the last row ever computed
+  	    prev = row;
+
+  	    //updates row: goes to the next row to be computed
+  	    //------------------------------------------------------------------------------------------
+  	    //row = (row + step) & (nRows-1);	//(USE THIS IF nRows IS A POWER OF 2)
+  	    row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+  	    //------------------------------------------------------------------------------------------
+
+      } while (row != 0);
+    }
+    //==========================================================================/
+	
+    //============================ Wrap-up Phase ===============================//
+    //Absorbs the last block of the memory matrix
+    absorbBlock(state, memMatrix[rowa]);
+
+    //Squeezes the key
+    squeeze(state, K, kLen);
+    //==========================================================================/
+
+    //========================= Freeing the memory =============================//
+    free(memMatrix);
+    free(wholeMatrix);
+
+    //Wiping out the sponge's internal state before freeing it
+    memset(state, 0, 16 * sizeof (uint64_t));
+    free(state);
+    //==========================================================================/
+
+    return 0;
+}
diff --git a/sph/Lyra2.h b/sph/Lyra2.h
new file mode 100644
index 0000000000..13c7dbd3b3
--- /dev/null
+++ b/sph/Lyra2.h
@@ -0,0 +1,50 @@
+/**
+ * Header file for the Lyra2 Password Hashing Scheme (PHS).
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LYRA2_H_
+#define LYRA2_H_
+
+#include <stdint.h>
+
+typedef unsigned char byte;
+
+//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
+#define BLOCK_LEN_BLAKE2_SAFE_INT64 8                                   //512 bits (=64 bytes, =8 uint64_t)
+#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8)   //same as above, in bytes
+
+
+#ifdef BLOCK_LEN_BITS
+        #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64)      //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8)       //Block length, in bytes
+#else   //default block lenght: 768 bits
+        #define BLOCK_LEN_INT64 12                       //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8)    //Block length, in bytes
+#endif
+
+#ifndef N_COLS
+        #define N_COLS 8                                //Number of columns in the memory matrix: fixed to 64 by default
+#endif
+
+#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks
+#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8)        //Number of bytes per row
+
+
+int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols);
+
+#endif /* LYRA2_H_ */
diff --git a/sph/Sponge.c b/sph/Sponge.c
new file mode 100644
index 0000000000..efe4d19960
--- /dev/null
+++ b/sph/Sponge.c
@@ -0,0 +1,755 @@
+/**
+ * A simple implementation of Blake2b's internal permutation 
+ * in the form of a sponge.
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <time.h>
+#include "Sponge.h"
+#include "Lyra2.h"
+
+
+
+/**
+ * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder 
+ * receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
+ * typically have their internal state initialized with zeros, Blake2b's G function
+ * has a fixed point: if the internal state and message are both filled with zeros. the 
+ * resulting permutation will always be a block filled with zeros; this happens because 
+ * Blake2b does not use the constants originally employed in Blake2 inside its G function, 
+ * relying on the IV for avoiding possible fixed points.
+ * 
+ * @param state         The 1024-bit array to be initialized
+ */
+ void initState(uint64_t state[/*16*/]) {
+    //First 512 bis are zeros
+    memset(state, 0, 64); 
+    //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
+	
+    state[8] = blake2b_IV[0];
+    state[9] = blake2b_IV[1];
+    state[10] = blake2b_IV[2];
+    state[11] = blake2b_IV[3];
+    state[12] = blake2b_IV[4];
+    state[13] = blake2b_IV[5];
+    state[14] = blake2b_IV[6];
+    state[15] = blake2b_IV[7];
+
+}
+
+/**
+ * Execute Blake2b's G function, with all 12 rounds.
+ * 
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+__inline static void blake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+    ROUND_LYRA(1);
+    ROUND_LYRA(2);
+    ROUND_LYRA(3);
+    ROUND_LYRA(4);
+    ROUND_LYRA(5);
+    ROUND_LYRA(6);
+    ROUND_LYRA(7);
+    ROUND_LYRA(8);
+    ROUND_LYRA(9);
+    ROUND_LYRA(10);
+    ROUND_LYRA(11);
+}
+
+/**
+ * Executes a reduced version of Blake2b's G function with only one round
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+__inline static void reducedBlake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+}
+
+/**
+ * Performs a squeeze operation, using Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state      The current state of the sponge 
+ * @param out        Array that will receive the data squeezed
+ * @param len        The number of bytes to be squeezed into the "out" array
+ */
+ void squeeze(uint64_t *state, byte *out, unsigned int len) {
+    int fullBlocks = len / BLOCK_LEN_BYTES;
+    byte *ptr = out;
+    int i;
+    //Squeezes full blocks
+    for (i = 0; i < fullBlocks; i++) {
+	memcpy(ptr, state, BLOCK_LEN_BYTES);
+	blake2bLyra(state);
+	ptr += BLOCK_LEN_BYTES;
+    }
+
+    //Squeezes remaining bytes
+    memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
+ * of type uint64_t), using Blake2b's G function as the internal permutation
+ * 
+ * @param state The current state of the sponge 
+ * @param in    The block to be absorbed (BLOCK_LEN_INT64 words)
+ */
+void absorbBlock(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+    state[8] ^= in[8];
+    state[9] ^= in[9];
+    state[10] ^= in[10];
+    state[11] ^= in[11];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 
+ * words of type uint64_t), using Blake2b's G function as the internal permutation
+ * 
+ * @param state The current state of the sponge 
+ * @param in    The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
+ */
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) {
+    //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+	
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+	/*
+	for (int i = 0; i<16; i++) {
+		printf(" final state %d %08x %08x in %08x %08x\n", i, (uint32_t)(state[i] & 0xFFFFFFFFULL), (uint32_t)(state[i] >> 32),
+			(uint32_t)(in[i] & 0xFFFFFFFFULL), (uint32_t)(in[i] >> 32));
+	}
+*/
+}
+
+/** 
+ * Performs a reduced squeeze operation for a single row, from the highest to 
+ * the lowest index, using the reduced-round Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state     The current state of the sponge 
+ * @param rowOut    Row to receive the data squeezed
+ */
+void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) {
+    uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
+    int i;
+    //M[row][C-1-col] = H.reduced_squeeze()    
+    for (i = 0; i < N_COLS; i++) {
+		
+	ptrWord[0] = state[0];
+	ptrWord[1] = state[1];
+	ptrWord[2] = state[2];
+	ptrWord[3] = state[3];
+	ptrWord[4] = state[4];
+	ptrWord[5] = state[5];
+	ptrWord[6] = state[6];
+	ptrWord[7] = state[7];
+	ptrWord[8] = state[8];
+	ptrWord[9] = state[9];
+	ptrWord[10] = state[10];
+	ptrWord[11] = state[11];
+	/*
+for (int i = 0; i<12; i++) {
+		printf(" after reducedSqueezeRow0 %d %08x %08x in %08x %08x\n", i, (uint32_t)(ptrWord[i] & 0xFFFFFFFFULL), (uint32_t)(ptrWord[i] >> 32),
+			(uint32_t)(state[i] & 0xFFFFFFFFULL), (uint32_t)(state[i] >> 32));
+	}
+*/
+	//Goes to next block (column) that will receive the squeezed data
+	ptrWord -= BLOCK_LEN_INT64;
+	
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+    }
+}
+
+/** 
+ * Performs a reduced duplex operation for a single row, from the highest to 
+ * the lowest index, using the reduced-round Blake2b's G function as the 
+ * internal permutation
+ * 
+ * @param state		The current state of the sponge 
+ * @param rowIn		Row to feed the sponge
+ * @param rowOut	Row to receive the sponge's output
+ */
+ void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[prev][col]"
+	state[0]  ^= (ptrWordIn[0]);
+	state[1]  ^= (ptrWordIn[1]);
+	state[2]  ^= (ptrWordIn[2]);
+	state[3]  ^= (ptrWordIn[3]);
+	state[4]  ^= (ptrWordIn[4]);
+	state[5]  ^= (ptrWordIn[5]);
+	state[6]  ^= (ptrWordIn[6]);
+	state[7]  ^= (ptrWordIn[7]);
+	state[8]  ^= (ptrWordIn[8]);
+	state[9]  ^= (ptrWordIn[9]);
+	state[10] ^= (ptrWordIn[10]);
+	state[11] ^= (ptrWordIn[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][C-1-col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+	
+	
+	//Input: next column (i.e., next block in sequence)
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., 
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make 
+ * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and 
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left and N_COLS is a system parameter.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+ void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut;				//In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = M[prev][col] XOR rand
+	ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+	ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+	ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+	ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+	ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+	ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+	ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+	ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+	ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+	ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+	ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+	ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+	
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0]  ^= state[11];
+	ptrWordInOut[1]  ^= state[0];
+	ptrWordInOut[2]  ^= state[1];
+	ptrWordInOut[3]  ^= state[2];
+	ptrWordInOut[4]  ^= state[3];
+	ptrWordInOut[5]  ^= state[4];
+	ptrWordInOut[6]  ^= state[5];
+	ptrWordInOut[7]  ^= state[6];
+	ptrWordInOut[8]  ^= state[7];
+	ptrWordInOut[9]  ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Inputs: next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	//Output: goes to previous column
+	ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., 
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make 
+ * "M[rowOut][col] = M[rowOut][col] XOR rand" and 
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[prev] [+] M[row*]"
+	state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+	state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+	state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+	state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+	state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+	state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+	state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+	state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+	state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+	state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+	state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+	state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[11];
+	ptrWordInOut[1] ^= state[0];
+	ptrWordInOut[2] ^= state[1];
+	ptrWordInOut[3] ^= state[2];
+	ptrWordInOut[4] ^= state[3];
+	ptrWordInOut[5] ^= state[4];
+	ptrWordInOut[6] ^= state[5];
+	ptrWordInOut[7] ^= state[6];
+	ptrWordInOut[8] ^= state[7];
+	ptrWordInOut[9] ^= state[8];
+	ptrWordInOut[10] ^= state[9];
+	ptrWordInOut[11] ^= state[10];
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0];
+	ptrWordOut[1] = state[1];
+	ptrWordOut[2] = state[2];
+	ptrWordOut[3] = state[3];
+	ptrWordOut[4] = state[4];
+	ptrWordOut[5] = state[5];
+	ptrWordOut[6] = state[6];
+	ptrWordOut[7] = state[7];
+	ptrWordOut[8] = state[8];
+	ptrWordOut[9] = state[9];
+	ptrWordOut[10] = state[10];
+	ptrWordOut[11] = state[11];
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand"
+ * on M[rowOut] and making "M[rowInOut] =  M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit 
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut;
+    int i;
+
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+
+    ptrWordOut =  rowOut + BLOCK_LEN_INT64;
+    for (i = 0; i < N_COLS / 2; i++) {
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+
+	//M[row*][col] = M[row*][col] XOR rotW(rand)
+	ptrWordInOut[0] ^= state[10];
+	ptrWordInOut[1] ^= state[11];
+	ptrWordInOut[2] ^= state[0];
+	ptrWordInOut[3] ^= state[1];
+	ptrWordInOut[4] ^= state[2];
+	ptrWordInOut[5] ^= state[3];
+	ptrWordInOut[6] ^= state[4];
+	ptrWordInOut[7] ^= state[5];
+	ptrWordInOut[8] ^= state[6];
+	ptrWordInOut[9] ^= state[7];
+	ptrWordInOut[10] ^= state[8];
+	ptrWordInOut[11] ^= state[9];
+
+
+	//M[row][col] = rand
+	ptrWordOut[0] = state[0] ^ ptrWordIn[0];
+	ptrWordOut[1] = state[1] ^ ptrWordIn[1];
+	ptrWordOut[2] = state[2] ^ ptrWordIn[2];
+	ptrWordOut[3] = state[3] ^ ptrWordIn[3];
+	ptrWordOut[4] = state[4] ^ ptrWordIn[4];
+	ptrWordOut[5] = state[5] ^ ptrWordIn[5];
+	ptrWordOut[6] = state[6] ^ ptrWordIn[6];
+	ptrWordOut[7] = state[7] ^ ptrWordIn[7];
+	ptrWordOut[8] = state[8] ^ ptrWordIn[8];
+	ptrWordOut[9] = state[9] ^ ptrWordIn[9];
+	ptrWordOut[10] = state[10] ^ ptrWordIn[10];
+	ptrWordOut[11] = state[11] ^ ptrWordIn[11];
+
+	//Goes to next column (i.e., next block in sequence)
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+	ptrWordOut += 2 * BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand"
+ * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)", 
+ * where rotW is a 64-bit rotation to the left.
+ *
+ * @param state          The current state of the sponge 
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+/*
+inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) {
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    int i;
+    for (i = 0; i < N_COLS; i++) {
+
+	//Absorbing "M[rowInOut] XOR M[rowIn]"
+	state[0] ^= ptrWordInOut[0] + ptrWordIn[0];
+	state[1] ^= ptrWordInOut[1] + ptrWordIn[1];
+	state[2] ^= ptrWordInOut[2] + ptrWordIn[2];
+	state[3] ^= ptrWordInOut[3] + ptrWordIn[3];
+	state[4] ^= ptrWordInOut[4] + ptrWordIn[4];
+	state[5] ^= ptrWordInOut[5] + ptrWordIn[5];
+	state[6] ^= ptrWordInOut[6] + ptrWordIn[6];
+	state[7] ^= ptrWordInOut[7] + ptrWordIn[7];
+	state[8] ^= ptrWordInOut[8] + ptrWordIn[8];
+	state[9] ^= ptrWordInOut[9] + ptrWordIn[9];
+	state[10] ^= ptrWordInOut[10] + ptrWordIn[10];
+	state[11] ^= ptrWordInOut[11] + ptrWordIn[11];
+
+	//Applies the reduced-round transformation f to the sponge's state
+	reducedBlake2bLyra(state);
+
+	//M[rowOut][col] = M[rowOut][col] XOR rand
+	ptrWordOut[0] ^= state[0];
+	ptrWordOut[1] ^= state[1];
+	ptrWordOut[2] ^= state[2];
+	ptrWordOut[3] ^= state[3];
+	ptrWordOut[4] ^= state[4];
+	ptrWordOut[5] ^= state[5];
+	ptrWordOut[6] ^= state[6];
+	ptrWordOut[7] ^= state[7];
+	ptrWordOut[8] ^= state[8];
+	ptrWordOut[9] ^= state[9];
+	ptrWordOut[10] ^= state[10];
+	ptrWordOut[11] ^= state[11];
+
+	//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+	
+
+	//Goes to next block
+	ptrWordOut += BLOCK_LEN_INT64;
+	ptrWordInOut += BLOCK_LEN_INT64;
+	ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+*/
+
+/**
+ Prints an array of unsigned chars
+ */
+void printArray(unsigned char *array, unsigned int size, char *name) {
+    int i;
+    printf("%s: ", name);
+    for (i = 0; i < size; i++) {
+	printf("%2x|", array[i]);
+    }
+    printf("\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sph/Sponge.h b/sph/Sponge.h
new file mode 100644
index 0000000000..0e5745dadc
--- /dev/null
+++ b/sph/Sponge.h
@@ -0,0 +1,108 @@
+/**
+ * Header file for Blake2b's internal permutation in the form of a sponge. 
+ * This code is based on the original Blake2b's implementation provided by 
+ * Samuel Neves (https://blake2.net/)
+ * 
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ * 
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SPONGE_H_
+#define SPONGE_H_
+
+#include <stdint.h>
+
+#if defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(32)))
+#elif defined(_MSC_VER)
+#define ALIGN __declspec(align(32))
+#else
+#define ALIGN
+#endif
+
+
+/*Blake2b IV Array*/
+static const uint64_t blake2b_IV[8] =
+{
+  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/*Blake2b's rotation*/
+static __inline uint64_t rotr64( const uint64_t w, const unsigned c ){
+    return ( w >> c ) | ( w << ( 64 - c ) );
+}
+
+/*Blake2b's G function*/
+#define G(r,i,a,b,c,d) \
+  do { \
+    a = a + b; \
+    d = rotr64(d ^ a, 32); \
+    c = c + d; \
+    b = rotr64(b ^ c, 24); \
+    a = a + b; \
+    d = rotr64(d ^ a, 16); \
+    c = c + d; \
+    b = rotr64(b ^ c, 63); \
+  } while(0)
+
+
+/*One Round of the Blake2b's compression function*/
+#define ROUND_LYRA(r)  \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
+
+
+//---- Housekeeping
+void initState(uint64_t state[/*16*/]);
+
+//---- Squeezes
+void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
+void reducedSqueezeRow0(uint64_t* state, uint64_t* row);
+
+//---- Absorbs
+void absorbBlock(uint64_t *state, const uint64_t *in);
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
+
+//---- Duplexes
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut);
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+
+//---- Misc
+void printArray(unsigned char *array, unsigned int size, char *name);
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////TESTS////
+//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
+//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
+/////////////
+
+
+#endif /* SPONGE_H_ */
diff --git a/sph/haval.c b/sph/haval.c
new file mode 100644
index 0000000000..f9a8918760
--- /dev/null
+++ b/sph/haval.c
@@ -0,0 +1,983 @@
+/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * HAVAL implementation.
+ *
+ * The HAVAL reference paper is of questionable clarity with regards to
+ * some details such as endianness of bits within a byte, bytes within
+ * a 32-bit word, or the actual ordering of words within a stream of
+ * words. This implementation has been made compatible with the reference
+ * implementation available on: http://labs.calyptix.com/haval.php
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_haval.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL
+#define SPH_SMALL_FOOTPRINT_HAVAL   1
+#endif
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F1(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ ((x0) & (x1)) ^ (x0))
+ *
+ */
+
+#define F1(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F2(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x1) & (x2)) \
+	^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x5)) \
+	^ ((x4) & (x5)) ^ ((x0) & (x2)) ^ (x0))
+ *
+ */
+
+#define F2(x6, x5, x4, x3, x2, x1, x0) \
+	(((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \
+	^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0)))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F3(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x1) & (x4)) ^ ((x2) & (x5)) \
+	^ ((x3) & (x6)) ^ ((x0) & (x3)) ^ (x0))
+ *
+ */
+
+#define F3(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F4(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x3) & (x4) & (x6)) \
+	^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x4)) ^ ((x3) & (x5)) \
+	^ ((x3) & (x6)) ^ ((x4) & (x5)) ^ ((x4) & (x6)) ^ ((x0) & (x4)) ^ (x0))
+ *
+ */
+
+#define F4(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \
+	^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \
+	^ ((x2) & (x6)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F5(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) \
+	^ ((x0) & (x1) & (x2) & (x3)) ^ ((x0) & (x5)) ^ (x0))
+ *
+ */
+
+#define F5(x6, x5, x4, x3, x2, x1, x0) \
+	(((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)))
+
+/*
+ * The macros below integrate the phi() permutations, depending on the
+ * pass and the total number of passes.
+ */
+
+#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x1, x0, x3, x5, x6, x2, x4)
+#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x4, x2, x1, x0, x5, x3, x6)
+#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x6, x1, x2, x3, x4, x5, x0)
+
+#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x2, x6, x1, x4, x5, x3, x0)
+#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x3, x5, x2, x0, x1, x6, x4)
+#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x1, x4, x3, x6, x0, x2, x5)
+#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \
+	F4(x6, x4, x0, x5, x2, x1, x3)
+
+#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x3, x4, x1, x0, x5, x2, x6)
+#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x6, x2, x1, x0, x3, x4, x5)
+#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x2, x6, x0, x4, x3, x1, x5)
+#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
+	F4(x1, x5, x3, x2, x0, x4, x6)
+#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
+	F5(x2, x5, x0, x6, x4, x3, x1)
+
+/*
+ * One step, for "n" passes, pass number "p" (1 <= p <= n), using
+ * input word number "w" and step constant "c".
+ */
+#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c)  do { \
+		sph_u32 t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
+		(x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \
+			+ (w) + (c)); \
+	} while (0)
+
+/*
+ * PASSy(n, in) computes pass number "y", for a total of "n", using the
+ * one-argument macro "in" to access input words. Current state is assumed
+ * to be held in variables "s0" to "s7".
+ */
+
+#if SPH_SMALL_FOOTPRINT_HAVAL
+
+#define PASS1(n, in)   do { \
+		unsigned pass_count; \
+		for (pass_count = 0; pass_count < 32; pass_count += 8) { \
+			STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \
+				in(pass_count + 0), SPH_C32(0x00000000)); \
+			STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \
+				in(pass_count + 1), SPH_C32(0x00000000)); \
+			STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \
+				in(pass_count + 2), SPH_C32(0x00000000)); \
+			STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \
+				in(pass_count + 3), SPH_C32(0x00000000)); \
+			STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \
+				in(pass_count + 4), SPH_C32(0x00000000)); \
+			STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \
+				in(pass_count + 5), SPH_C32(0x00000000)); \
+			STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \
+				in(pass_count + 6), SPH_C32(0x00000000)); \
+			STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \
+				in(pass_count + 7), SPH_C32(0x00000000)); \
+   		} \
+	} while (0)
+
+#define PASSG(p, n, in)   do { \
+		unsigned pass_count; \
+		for (pass_count = 0; pass_count < 32; pass_count += 8) { \
+			STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \
+				in(MP ## p[pass_count + 0]), \
+				RK ## p[pass_count + 0]); \
+			STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \
+				in(MP ## p[pass_count + 1]), \
+				RK ## p[pass_count + 1]); \
+			STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \
+				in(MP ## p[pass_count + 2]), \
+				RK ## p[pass_count + 2]); \
+			STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \
+				in(MP ## p[pass_count + 3]), \
+				RK ## p[pass_count + 3]); \
+			STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \
+				in(MP ## p[pass_count + 4]), \
+				RK ## p[pass_count + 4]); \
+			STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \
+				in(MP ## p[pass_count + 5]), \
+				RK ## p[pass_count + 5]); \
+			STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \
+				in(MP ## p[pass_count + 6]), \
+				RK ## p[pass_count + 6]); \
+			STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \
+				in(MP ## p[pass_count + 7]), \
+				RK ## p[pass_count + 7]); \
+   		} \
+	} while (0)
+
+#define PASS2(n, in)    PASSG(2, n, in)
+#define PASS3(n, in)    PASSG(3, n, in)
+#define PASS4(n, in)    PASSG(4, n, in)
+#define PASS5(n, in)    PASSG(5, n, in)
+
+static const unsigned MP2[32] = {
+	 5, 14, 26, 18, 11, 28,  7, 16,
+	 0, 23, 20, 22,  1, 10,  4,  8,
+	30,  3, 21,  9, 17, 24, 29,  6,
+	19, 12, 15, 13,  2, 25, 31, 27
+};
+
+static const unsigned MP3[32] = {
+	19,  9,  4, 20, 28, 17,  8, 22,
+	29, 14, 25, 12, 24, 30, 16, 26,
+	31, 15,  7,  3,  1,  0, 18, 27,
+	13,  6, 21, 10, 23, 11,  5,  2
+};
+
+static const unsigned MP4[32] = {
+	24,  4,  0, 14,  2,  7, 28, 23,
+	26,  6, 30, 20, 18, 25, 19,  3,
+	22, 11, 31, 21,  8, 27, 12,  9,
+	 1, 29,  5, 15, 17, 10, 16, 13
+};
+
+static const unsigned MP5[32] = {
+	27,  3, 21, 26, 17, 11, 20, 29,
+	19,  0, 12,  7, 13,  8, 31, 10,
+	 5,  9, 14, 30, 18,  6, 28, 24,
+	 2, 23, 16, 22,  4,  1, 25, 15
+};
+
+static const sph_u32 RK2[32] = {
+	SPH_C32(0x452821E6), SPH_C32(0x38D01377),
+	SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
+	SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
+	SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917),
+	SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B),
+	SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC),
+	SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7),
+	SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96),
+	SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99),
+	SPH_C32(0x24A19947), SPH_C32(0xB3916CF7),
+	SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16),
+	SPH_C32(0x636920D8), SPH_C32(0x71574E69),
+	SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E),
+	SPH_C32(0x0D95748F), SPH_C32(0x728EB658),
+	SPH_C32(0x718BCD58), SPH_C32(0x82154AEE),
+	SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5)
+};
+
+static const sph_u32 RK3[32] = {
+	SPH_C32(0x9C30D539), SPH_C32(0x2AF26013),
+	SPH_C32(0xC5D1B023), SPH_C32(0x286085F0),
+	SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF),
+	SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E),
+	SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E),
+	SPH_C32(0xD71577C1), SPH_C32(0xBD314B27),
+	SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60),
+	SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94),
+	SPH_C32(0x57489862), SPH_C32(0x63E81440),
+	SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6),
+	SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE),
+	SPH_C32(0xA15486AF), SPH_C32(0x7C72E993),
+	SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A),
+	SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6),
+	SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E),
+	SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C)
+};
+
+static const sph_u32 RK4[32] = {
+	SPH_C32(0x7A325381), SPH_C32(0x28958677),
+	SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF),
+	SPH_C32(0xC4BFE81B), SPH_C32(0x66282193),
+	SPH_C32(0x61D809CC), SPH_C32(0xFB21A991),
+	SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032),
+	SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1),
+	SPH_C32(0xDC262302), SPH_C32(0xEB651B88),
+	SPH_C32(0x23893E81), SPH_C32(0xD396ACC5),
+	SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239),
+	SPH_C32(0x2E0B4482), SPH_C32(0xA4842004),
+	SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E),
+	SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A),
+	SPH_C32(0x670C9C61), SPH_C32(0xABD388F0),
+	SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68),
+	SPH_C32(0x960FA728), SPH_C32(0xAB5133A3),
+	SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4)
+};
+
+static const sph_u32 RK5[32] = {
+	SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98),
+	SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176),
+	SPH_C32(0x66CA593E), SPH_C32(0x82430E88),
+	SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4),
+	SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE),
+	SPH_C32(0xE06F75D8), SPH_C32(0x85C12073),
+	SPH_C32(0x401A449F), SPH_C32(0x56C16AA6),
+	SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706),
+	SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D),
+	SPH_C32(0x37D0D724), SPH_C32(0xD00A1248),
+	SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B),
+	SPH_C32(0x075372C9), SPH_C32(0x80991B7B),
+	SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7),
+	SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B),
+	SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA),
+	SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4)
+};
+
+#else
+
+#define PASS1(n, in)   do { \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 1), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in( 2), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in( 5), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in( 6), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in( 7), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 8), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(10), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(11), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(12), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(13), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(14), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(16), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(17), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(18), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(19), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(20), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(21), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(22), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(25), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(27), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(29), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(30), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(31), SPH_C32(0x00000000)); \
+	} while (0)
+
+#define PASS2(n, in)   do { \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x452821E6)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0x38D01377)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0xBE5466CF)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(18), SPH_C32(0x34E90C6C)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(11), SPH_C32(0xC0AC29B7)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(28), SPH_C32(0xC97C50DD)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 7), SPH_C32(0x3F84D5B5)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(16), SPH_C32(0xB5470917)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x9216D5D9)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0x8979FB1B)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(20), SPH_C32(0xD1310BA6)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0x98DFB5AC)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0x2FFD72DB)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xD01ADFB7)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 4), SPH_C32(0xB8E1AFED)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 8), SPH_C32(0x6A267E96)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(30), SPH_C32(0xBA7C9045)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0xF12C7F99)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x24A19947)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in( 9), SPH_C32(0xB3916CF7)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x0801F2E2)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(24), SPH_C32(0x858EFC16)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(29), SPH_C32(0x636920D8)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 6), SPH_C32(0x71574E69)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0xA458FEA3)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(12), SPH_C32(0xF4933D7E)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(15), SPH_C32(0x0D95748F)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(13), SPH_C32(0x728EB658)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0x718BCD58)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0x82154AEE)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x7B54A41D)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0xC25A59B5)); \
+	} while (0)
+
+#define PASS3(n, in)   do { \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x9C30D539)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x2AF26013)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 4), SPH_C32(0xC5D1B023)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0x286085F0)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0xCA417918)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(17), SPH_C32(0xB8DB38EF)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 8), SPH_C32(0x8E79DCB0)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(22), SPH_C32(0x603A180E)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(29), SPH_C32(0x6C9E0E8B)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0xB01E8A3E)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(25), SPH_C32(0xD71577C1)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(12), SPH_C32(0xBD314B27)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(24), SPH_C32(0x78AF2FDA)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(30), SPH_C32(0x55605C60)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0xE65525F3)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(26), SPH_C32(0xAA55AB94)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(31), SPH_C32(0x57489862)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(15), SPH_C32(0x63E81440)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 7), SPH_C32(0x55CA396A)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x2AAB10B6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0xB4CC5C34)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in( 0), SPH_C32(0x1141E8CE)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(18), SPH_C32(0xA15486AF)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0x7C72E993)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(13), SPH_C32(0xB3EE1411)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x636FBC2A)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x2BA9C55D)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(10), SPH_C32(0x741831F6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(23), SPH_C32(0xCE5C3E16)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x9B87931E)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 5), SPH_C32(0xAFD6BA33)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in( 2), SPH_C32(0x6C24CF5C)); \
+	} while (0)
+
+#define PASS4(n, in)   do { \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x7A325381)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 4), SPH_C32(0x28958677)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 0), SPH_C32(0x3B8F4898)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(14), SPH_C32(0x6B4BB9AF)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0xC4BFE81B)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in( 7), SPH_C32(0x66282193)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x61D809CC)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0xFB21A991)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(26), SPH_C32(0x487CAC60)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x5DEC8032)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(30), SPH_C32(0xEF845D5D)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0xE98575B1)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDC262302)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0xEB651B88)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(19), SPH_C32(0x23893E81)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 3), SPH_C32(0xD396ACC5)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(22), SPH_C32(0x0F6D6FF3)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(11), SPH_C32(0x83F44239)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(31), SPH_C32(0x2E0B4482)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(21), SPH_C32(0xA4842004)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 8), SPH_C32(0x69C8F04A)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(27), SPH_C32(0x9E1F9B5E)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(12), SPH_C32(0x21C66842)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 9), SPH_C32(0xF6E96C9A)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in( 1), SPH_C32(0x670C9C61)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(29), SPH_C32(0xABD388F0)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 5), SPH_C32(0x6A51A0D2)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(15), SPH_C32(0xD8542F68)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x960FA728)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xAB5133A3)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0x6EEF0B6C)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(13), SPH_C32(0x137A3BE4)); \
+	} while (0)
+
+#define PASS5(n, in)   do { \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(27), SPH_C32(0xBA3BF050)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0x7EFB2A98)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0xA1F1651D)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(26), SPH_C32(0x39AF0176)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x66CA593E)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x82430E88)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(20), SPH_C32(0x8CEE8619)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(29), SPH_C32(0x456F9FB4)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x7D84A5C3)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 0), SPH_C32(0x3B8B5EBE)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(12), SPH_C32(0xE06F75D8)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in( 7), SPH_C32(0x85C12073)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(13), SPH_C32(0x401A449F)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 8), SPH_C32(0x56C16AA6)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x4ED3AA62)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(10), SPH_C32(0x363F7706)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x1BFEDF72)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x429B023D)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(14), SPH_C32(0x37D0D724)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(30), SPH_C32(0xD00A1248)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDB0FEAD3)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 6), SPH_C32(0x49F1C09B)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x075372C9)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(24), SPH_C32(0x80991B7B)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 2), SPH_C32(0x25D479D8)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0xF6E8DEF7)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(16), SPH_C32(0xE3FE501A)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0xB6794C3B)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x976CE0BD)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 1), SPH_C32(0x04C006BA)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(25), SPH_C32(0xC1A94FB6)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x409F60C4)); \
+	} while (0)
+
+#endif
+
+#define SAVE_STATE \
+	sph_u32 u0, u1, u2, u3, u4, u5, u6, u7; \
+	do { \
+		u0 = s0; \
+		u1 = s1; \
+		u2 = s2; \
+		u3 = s3; \
+		u4 = s4; \
+		u5 = s5; \
+		u6 = s6; \
+		u7 = s7; \
+	} while (0)
+
+#define UPDATE_STATE   do { \
+		s0 = SPH_T32(s0 + u0); \
+		s1 = SPH_T32(s1 + u1); \
+		s2 = SPH_T32(s2 + u2); \
+		s3 = SPH_T32(s3 + u3); \
+		s4 = SPH_T32(s4 + u4); \
+		s5 = SPH_T32(s5 + u5); \
+		s6 = SPH_T32(s6 + u6); \
+		s7 = SPH_T32(s7 + u7); \
+	} while (0)
+
+/*
+ * COREn(in) performs the core HAVAL computation for "n" passes, using
+ * the one-argument macro "in" to access the input words. Running state
+ * is held in variable "s0" to "s7".
+ */
+
+#define CORE3(in)  do { \
+		SAVE_STATE; \
+		PASS1(3, in); \
+		PASS2(3, in); \
+		PASS3(3, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+#define CORE4(in)  do { \
+		SAVE_STATE; \
+		PASS1(4, in); \
+		PASS2(4, in); \
+		PASS3(4, in); \
+		PASS4(4, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+#define CORE5(in)  do { \
+		SAVE_STATE; \
+		PASS1(5, in); \
+		PASS2(5, in); \
+		PASS3(5, in); \
+		PASS4(5, in); \
+		PASS5(5, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+/*
+ * DSTATE declares the state variables "s0" to "s7".
+ */
+#define DSTATE   sph_u32 s0, s1, s2, s3, s4, s5, s6, s7
+
+/*
+ * RSTATE fills the state variables from the context "sc".
+ */
+#define RSTATE   do { \
+		s0 = sc->s0; \
+		s1 = sc->s1; \
+		s2 = sc->s2; \
+		s3 = sc->s3; \
+		s4 = sc->s4; \
+		s5 = sc->s5; \
+		s6 = sc->s6; \
+		s7 = sc->s7; \
+	} while (0)
+
+/*
+ * WSTATE updates the context "sc" from the state variables.
+ */
+#define WSTATE   do { \
+		sc->s0 = s0; \
+		sc->s1 = s1; \
+		sc->s2 = s2; \
+		sc->s3 = s3; \
+		sc->s4 = s4; \
+		sc->s5 = s5; \
+		sc->s6 = s6; \
+		sc->s7 = s7; \
+	} while (0)
+
+/*
+ * Initialize a context. "olen" is the output length, in 32-bit words
+ * (between 4 and 8, inclusive). "passes" is the number of passes
+ * (3, 4 or 5).
+ */
+static void
+haval_init(sph_haval_context *sc, unsigned olen, unsigned passes)
+{
+	sc->s0 = SPH_C32(0x243F6A88);
+	sc->s1 = SPH_C32(0x85A308D3);
+	sc->s2 = SPH_C32(0x13198A2E);
+	sc->s3 = SPH_C32(0x03707344);
+	sc->s4 = SPH_C32(0xA4093822);
+	sc->s5 = SPH_C32(0x299F31D0);
+	sc->s6 = SPH_C32(0x082EFA98);
+	sc->s7 = SPH_C32(0xEC4E6C89);
+	sc->olen = olen;
+	sc->passes = passes;
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = 0;
+	sc->count_low = 0;
+#endif
+	
+}
+
+/*
+ * IN_PREPARE(data) contains declarations and code to prepare for
+ * reading input words pointed to by "data".
+ * INW(i) reads the word number "i" (from 0 to 31).
+ */
+#if SPH_LITTLE_FAST
+#define IN_PREPARE(indata)   const unsigned char *const load_ptr = \
+                             (const unsigned char *)(indata)
+#define INW(i)   sph_dec32le_aligned(load_ptr + 4 * (i))
+#else
+#define IN_PREPARE(indata) \
+	sph_u32 X_var[32]; \
+	int load_index; \
+ \
+	for (load_index = 0; load_index < 32; load_index ++) \
+		X_var[load_index] = sph_dec32le_aligned( \
+			(const unsigned char *)(indata) + 4 * load_index)
+#define INW(i)   X_var[i]
+#endif
+
+/*
+ * Mixing operation used for 128-bit output tailoring. This function
+ * takes the byte 0 from a0, byte 1 from a1, byte 2 from a2 and byte 3
+ * from a3, and combines them into a 32-bit word, which is then rotated
+ * to the left by n bits.
+ */
+static SPH_INLINE sph_u32
+mix128(sph_u32 a0, sph_u32 a1, sph_u32 a2, sph_u32 a3, int n)
+{
+	sph_u32 tmp;
+
+	tmp = (a0 & SPH_C32(0x000000FF))
+		| (a1 & SPH_C32(0x0000FF00))
+		| (a2 & SPH_C32(0x00FF0000))
+		| (a3 & SPH_C32(0xFF000000));
+	if (n > 0)
+		tmp = SPH_ROTL32(tmp, n);
+	return tmp;
+}
+
+/*
+ * Mixing operation used to compute output word 0 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_0(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x01F80000))
+		| (x6 & SPH_C32(0xFE000000))
+		| (x7 & SPH_C32(0x0000003F));
+	return SPH_ROTL32(tmp, 13);
+}
+
+/*
+ * Mixing operation used to compute output word 1 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_1(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0xFE000000))
+		| (x6 & SPH_C32(0x0000003F))
+		| (x7 & SPH_C32(0x00000FC0));
+	return SPH_ROTL32(tmp, 7);
+}
+
+/*
+ * Mixing operation used to compute output word 2 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_2(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x0000003F))
+		| (x6 & SPH_C32(0x00000FC0))
+		| (x7 & SPH_C32(0x0007F000));
+	return tmp;
+}
+
+/*
+ * Mixing operation used to compute output word 3 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_3(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x00000FC0))
+		| (x6 & SPH_C32(0x0007F000))
+		| (x7 & SPH_C32(0x01F80000));
+	return tmp >> 6;
+}
+
+/*
+ * Mixing operation used to compute output word 4 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_4(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x0007F000))
+		| (x6 & SPH_C32(0x01F80000))
+		| (x7 & SPH_C32(0xFE000000));
+	return tmp >> 12;
+}
+
+/*
+ * Mixing operation used to compute output word 0 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_0(sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x6 & SPH_C32(0xFC000000)) | (x7 & SPH_C32(0x0000001F));
+	return SPH_ROTL32(tmp, 6);
+}
+
+/*
+ * Mixing operation used to compute output word 1 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_1(sph_u32 x6, sph_u32 x7)
+{
+	return (x6 & SPH_C32(0x0000001F)) | (x7 & SPH_C32(0x000003E0));
+}
+
+/*
+ * Mixing operation used to compute output word 2 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_2(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x000003E0)) | (x7 & SPH_C32(0x0000FC00))) >> 5;
+}
+
+/*
+ * Mixing operation used to compute output word 3 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_3(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x0000FC00)) | (x7 & SPH_C32(0x001F0000))) >> 10;
+}
+
+/*
+ * Mixing operation used to compute output word 4 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_4(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x001F0000)) | (x7 & SPH_C32(0x03E00000))) >> 16;
+}
+
+/*
+ * Mixing operation used to compute output word 5 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_5(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x03E00000)) | (x7 & SPH_C32(0xFC000000))) >> 21;
+}
+
+/*
+ * Write out HAVAL output. The output length is tailored to the requested
+ * length.
+ */
+static void
+haval_out(sph_haval_context *sc, void *dst)
+{
+	DSTATE;
+	unsigned char *buf;
+
+	buf = dst;
+	RSTATE;
+	switch (sc->olen) {
+	case 4:
+		sph_enc32le(buf,      SPH_T32(s0 + mix128(s7, s4, s5, s6, 24)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix128(s6, s7, s4, s5, 16)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix128(s5, s6, s7, s4, 8)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix128(s4, s5, s6, s7, 0)));
+		break;
+	case 5:
+		sph_enc32le(buf,      SPH_T32(s0 + mix160_0(s5, s6, s7)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix160_1(s5, s6, s7)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix160_2(s5, s6, s7)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix160_3(s5, s6, s7)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + mix160_4(s5, s6, s7)));
+		break;
+	case 6:
+		sph_enc32le(buf,      SPH_T32(s0 + mix192_0(s6, s7)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix192_1(s6, s7)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix192_2(s6, s7)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix192_3(s6, s7)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + mix192_4(s6, s7)));
+		sph_enc32le(buf + 20, SPH_T32(s5 + mix192_5(s6, s7)));
+		break;
+	case 7:
+		sph_enc32le(buf,      SPH_T32(s0 + ((s7 >> 27) & 0x1F)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + ((s7 >> 22) & 0x1F)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + ((s7 >> 18) & 0x0F)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + ((s7 >> 13) & 0x1F)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + ((s7 >>  9) & 0x0F)));
+		sph_enc32le(buf + 20, SPH_T32(s5 + ((s7 >>  4) & 0x1F)));
+		sph_enc32le(buf + 24, SPH_T32(s6 + ((s7      ) & 0x0F)));
+		break;
+	case 8:
+		sph_enc32le(buf,      s0);
+		sph_enc32le(buf + 4,  s1);
+		sph_enc32le(buf + 8,  s2);
+		sph_enc32le(buf + 12, s3);
+		sph_enc32le(buf + 16, s4);
+		sph_enc32le(buf + 20, s5);
+		sph_enc32le(buf + 24, s6);
+		sph_enc32le(buf + 28, s7);
+		break;
+	}
+}
+
+/*
+ * The main core functions inline the code with the COREx() macros. We
+ * use a helper file, included three times, which avoids code copying.
+ */
+
+#undef PASSES
+#define PASSES   3
+#include "haval_helper.c"
+
+#undef PASSES
+#define PASSES   4
+#include "haval_helper.c"
+
+#undef PASSES
+#define PASSES   5
+#include "haval_helper.c"
+
+/* ====================================================================== */
+
+#define API(xxx, y) \
+void \
+sph_haval ## xxx ## _ ## y ## _init(void *cc) \
+{ \
+	haval_init(cc, xxx >> 5, y); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y (void *cc, const void *data, size_t len) \
+{ \
+	haval ## y(cc, data, len); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y ## _close(void *cc, void *dst) \
+{ \
+	haval ## y ## _close(cc, 0, 0, dst); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y ## addbits_and_close( \
+	void *cc, unsigned ub, unsigned n, void *dst) \
+{ \
+	haval ## y ## _close(cc, ub, n, dst); \
+}
+
+API(128, 3)
+API(128, 4)
+API(128, 5)
+API(160, 3)
+API(160, 4)
+API(160, 5)
+API(192, 3)
+API(192, 4)
+API(192, 5)
+API(224, 3)
+API(224, 4)
+API(224, 5)
+API(256, 3)
+API(256, 4)
+API(256, 5)
+
+#define RVAL   do { \
+		s0 = val[0]; \
+		s1 = val[1]; \
+		s2 = val[2]; \
+		s3 = val[3]; \
+		s4 = val[4]; \
+		s5 = val[5]; \
+		s6 = val[6]; \
+		s7 = val[7]; \
+	} while (0)
+
+#define WVAL   do { \
+		val[0] = s0; \
+		val[1] = s1; \
+		val[2] = s2; \
+		val[3] = s3; \
+		val[4] = s4; \
+		val[5] = s5; \
+		val[6] = s6; \
+		val[7] = s7; \
+	} while (0)
+
+#define INMSG(i)   msg[i]
+
+/* see sph_haval.h */
+void
+sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE3(INMSG);
+	WVAL;
+}
+
+/* see sph_haval.h */
+void
+sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE4(INMSG);
+	WVAL;
+}
+
+/* see sph_haval.h */
+void
+sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE5(INMSG);
+	WVAL;
+}
+
+#ifdef __cplusplus
+}
+#endif	
diff --git a/sph/haval_helper.c b/sph/haval_helper.c
new file mode 100644
index 0000000000..a8fe917eb3
--- /dev/null
+++ b/sph/haval_helper.c
@@ -0,0 +1,190 @@
+/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */
+/*
+ * Helper code, included (three times !) by HAVAL implementation.
+ *
+ * TODO: try to merge this with md_helper.c.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#undef SPH_XCAT
+#define SPH_XCAT(a, b)    SPH_XCAT_(a, b)
+#undef SPH_XCAT_
+#define SPH_XCAT_(a, b)   a ## b
+
+static void
+#ifdef SPH_UPTR
+SPH_XCAT(SPH_XCAT(haval, PASSES), _short)
+#else
+SPH_XCAT(haval, PASSES)
+#endif
+(sph_haval_context *sc, const void *data, size_t len)
+{
+	unsigned current;
+
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	while (len > 0) {
+		unsigned clen;
+#if !SPH_64
+		sph_u32 clow, clow2;
+#endif
+		
+		clen = 128U - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->buf + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		current += clen;
+		len -= clen;
+		if (current == 128U) {
+			DSTATE;
+			IN_PREPARE(sc->buf);
+			RSTATE;
+			SPH_XCAT(CORE, PASSES)(INW);
+			WSTATE;
+			current = 0;
+		}
+#if SPH_64
+		sc->count += clen;
+#else
+		clow = sc->count_low;
+		clow2 = SPH_T32(clow + clen);
+		sc->count_low = clow2;
+		if (clow2 < clow)
+			sc->count_high ++;
+#endif
+	}
+}
+
+#ifdef SPH_UPTR
+static void
+SPH_XCAT(haval, PASSES)(sph_haval_context *sc, const void *data, size_t len)
+{
+	unsigned current;
+	size_t orig_len;
+#if !SPH_64
+	sph_u32 clow, clow2;
+#endif
+	DSTATE;
+	
+	if (len < 256U) {
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len);
+		return;
+	}
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	if (current > 0) {
+		unsigned clen;
+		clen = 128U - current;
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 3U) != 0) {
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len);
+		return;
+	}
+#endif
+	orig_len = len;
+	RSTATE;
+	while (len >= 128U) {
+		IN_PREPARE(data);
+		SPH_XCAT(CORE, PASSES)(INW);
+		data = (const unsigned char *)data + 128U;
+		len -= 128U;
+	}
+	WSTATE;
+	if (len > 0)
+		memcpy(sc->buf, data, len);
+#if SPH_64
+	sc->count += (sph_u64)orig_len;
+#else
+	clow = sc->count_low;
+	clow2 = SPH_T32(clow + orig_len);
+	sc->count_low = clow2;
+	if (clow2 < clow)
+		sc->count_high ++;
+	orig_len >>= 12;
+	orig_len >>= 10;
+	orig_len >>= 10;
+	sc->count_high += orig_len;
+#endif
+}
+#endif
+
+static void
+SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc,
+	unsigned ub, unsigned n, void *dst)
+{
+	unsigned current,j;
+	DSTATE;
+
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	sc->buf[current ++] = (0x01 << n) | ((ub & 0xFF) >> (8 - n));
+	RSTATE;
+	if (current > 118U) {
+		memset(sc->buf + current, 0, 128U - current);
+
+		do {
+			IN_PREPARE(sc->buf);
+			SPH_XCAT(CORE, PASSES)(INW);
+		} while (0);
+		current = 0;
+	}
+	memset(sc->buf + current, 0, 118U - current);
+	sc->buf[118] = 0x01 | (PASSES << 3);
+	sc->buf[119] = sc->olen << 3;
+#if SPH_64
+	sph_enc64le_aligned(sc->buf + 120, SPH_T64(sc->count << 3));
+#else
+	sph_enc32le_aligned(sc->buf + 120, SPH_T32(sc->count_low << 3));
+	sph_enc32le_aligned(sc->buf + 124,
+		SPH_T32((sc->count_high << 3) | (sc->count_low >> 29)));
+#endif
+
+	do {
+		IN_PREPARE(sc->buf);
+		SPH_XCAT(CORE, PASSES)(INW);
+	} while (0);
+	WSTATE;
+
+	haval_out(sc, dst);
+	haval_init(sc, sc->olen, sc->passes);
+}
diff --git a/sph/md_helper.c b/sph/md_helper.c
new file mode 100644
index 0000000000..5384f03f73
--- /dev/null
+++ b/sph/md_helper.c
@@ -0,0 +1,346 @@
+/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * This file contains some functions which implement the external data
+ * handling and padding for Merkle-Damgard hash functions which follow
+ * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
+ *
+ * API: this file is meant to be included, not compiled as a stand-alone
+ * file. Some macros must be defined:
+ *   RFUN   name for the round function
+ *   HASH   "short name" for the hash function
+ *   BE32   defined for big-endian, 32-bit based (e.g. SHA-1)
+ *   LE32   defined for little-endian, 32-bit based (e.g. MD5)
+ *   BE64   defined for big-endian, 64-bit based (e.g. SHA-512)
+ *   LE64   defined for little-endian, 64-bit based (no example yet)
+ *   PW01   if defined, append 0x01 instead of 0x80 (for Tiger)
+ *   BLEN   if defined, length of a message block (in bytes)
+ *   PLW1   if defined, length is defined on one 64-bit word only (for Tiger)
+ *   PLW4   if defined, length is defined on four 64-bit words (for WHIRLPOOL)
+ *   SVAL   if defined, reference to the context state information
+ *
+ * BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
+ * this is used for instance for Tiger, which works on 64-bit words but
+ * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
+ * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
+ * set, then only one word (64 bits) will be used to encode the input
+ * message length (in bits), otherwise two words will be used (as in
+ * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
+ * not PLW1), four 64-bit words will be used to encode the message length
+ * (in bits). Note that regardless of those settings, only 64-bit message
+ * lengths are supported (in bits): messages longer than 2 Exabytes will be
+ * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
+ * 2 millions Terabytes, which is huge).
+ *
+ * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
+ * function. This is used for Tiger2, which is identical to Tiger except
+ * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
+ * of the 0x01 from original Tiger).
+ *
+ * The RFUN function is invoked with two arguments, the first pointing to
+ * aligned data (as a "const void *"), the second being state information
+ * from the context structure. By default, this state information is the
+ * "val" field from the context, and this field is assumed to be an array
+ * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
+ * from the context structure. The "val" field can have any type, except
+ * for the output encoding which assumes that it is an array of "sph_u32"
+ * values. By defining NO_OUTPUT, this last step is deactivated; the
+ * includer code is then responsible for writing out the hash result. When
+ * NO_OUTPUT is defined, the third parameter to the "close()" function is
+ * ignored.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#undef SPH_XCAT
+#define SPH_XCAT(a, b)     SPH_XCAT_(a, b)
+#undef SPH_XCAT_
+#define SPH_XCAT_(a, b)    a ## b
+
+#undef SPH_BLEN
+#undef SPH_WLEN
+#if defined BE64 || defined LE64
+#define SPH_BLEN    128U
+#define SPH_WLEN      8U
+#else
+#define SPH_BLEN     64U
+#define SPH_WLEN      4U
+#endif
+
+#ifdef BLEN
+#undef SPH_BLEN
+#define SPH_BLEN    BLEN
+#endif
+
+#undef SPH_MAXPAD
+#if defined PLW1
+#define SPH_MAXPAD   (SPH_BLEN - SPH_WLEN)
+#elif defined PLW4
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 2))
+#else
+#define SPH_MAXPAD   (SPH_BLEN - (SPH_WLEN << 1))
+#endif
+
+#undef SPH_VAL
+#undef SPH_NO_OUTPUT
+#ifdef SVAL
+#define SPH_VAL         SVAL
+#define SPH_NO_OUTPUT   1
+#else
+#define SPH_VAL   sc->val
+#endif
+
+#ifndef CLOSE_ONLY
+
+#ifdef SPH_UPTR
+static void
+SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
+#else
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+#endif
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current;
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	while (len > 0) {
+		unsigned clen;
+#if !SPH_64
+		sph_u32 clow, clow2;
+#endif
+
+		clen = SPH_BLEN - current;
+		if (clen > len)
+			clen = len;
+		memcpy(sc->buf + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		current += clen;
+		len -= clen;
+		if (current == SPH_BLEN) {
+			RFUN(sc->buf, SPH_VAL);
+			current = 0;
+		}
+#if SPH_64
+		sc->count += clen;
+#else
+		clow = sc->count_low;
+		clow2 = SPH_T32(clow + clen);
+		sc->count_low = clow2;
+		if (clow2 < clow)
+			sc->count_high ++;
+#endif
+	}
+}
+
+#ifdef SPH_UPTR
+void
+SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current;
+	size_t orig_len;
+#if !SPH_64
+	sph_u32 clow, clow2;
+#endif
+
+	if (len < (2 * SPH_BLEN)) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+	if (current > 0) {
+		unsigned t;
+
+		t = SPH_BLEN - current;
+		SPH_XCAT(HASH, _short)(cc, data, t);
+		data = (const unsigned char *)data + t;
+		len -= t;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
+		SPH_XCAT(HASH, _short)(cc, data, len);
+		return;
+	}
+#endif
+	orig_len = len;
+	while (len >= SPH_BLEN) {
+		RFUN(data, SPH_VAL);
+		len -= SPH_BLEN;
+		data = (const unsigned char *)data + SPH_BLEN;
+	}
+	if (len > 0)
+		memcpy(sc->buf, data, len);
+#if SPH_64
+	sc->count += (sph_u64)orig_len;
+#else
+	clow = sc->count_low;
+	clow2 = SPH_T32(clow + orig_len);
+	sc->count_low = clow2;
+	if (clow2 < clow)
+		sc->count_high ++;
+	/*
+	 * This code handles the improbable situation where "size_t" is
+	 * greater than 32 bits, and yet we do not have a 64-bit type.
+	 */
+	orig_len >>= 12;
+	orig_len >>= 10;
+	orig_len >>= 10;
+	sc->count_high += orig_len;
+#endif
+}
+#endif
+
+#endif
+
+/*
+ * Perform padding and produce result. The context is NOT reinitialized
+ * by this function.
+ */
+static void
+SPH_XCAT(HASH, _addbits_and_close)(void *cc,
+	unsigned ub, unsigned n, void *dst, unsigned rnum)
+{
+	SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
+	unsigned current, u;
+#if !SPH_64
+	sph_u32 low, high;
+#endif
+
+	sc = cc;
+#if SPH_64
+	current = (unsigned)sc->count & (SPH_BLEN - 1U);
+#else
+	current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
+#endif
+#ifdef PW01
+	sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
+#else
+	{
+		unsigned z;
+
+		z = 0x80 >> n;
+		sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
+	}
+#endif
+	if (current > SPH_MAXPAD) {
+		memset(sc->buf + current, 0, SPH_BLEN - current);
+		RFUN(sc->buf, SPH_VAL);
+		memset(sc->buf, 0, SPH_MAXPAD);
+	} else {
+		memset(sc->buf + current, 0, SPH_MAXPAD - current);
+	}
+#if defined BE64
+#if defined PLW1
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW4
+	memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
+		sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#elif defined LE64
+#if defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#elif defined PLW1
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+	memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
+#endif
+#else
+#if SPH_64
+#ifdef BE32
+	sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#else
+	sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
+		SPH_T64(sc->count << 3) + (sph_u64)n);
+#endif
+#else
+	low = sc->count_low;
+	high = SPH_T32((sc->count_high << 3) | (low >> 29));
+	low = SPH_T32(low << 3) + (sph_u32)n;
+#ifdef BE32
+	sph_enc32be(sc->buf + SPH_MAXPAD, high);
+	sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
+#else
+	sph_enc32le(sc->buf + SPH_MAXPAD, low);
+	sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
+#endif
+#endif
+#endif
+	RFUN(sc->buf, SPH_VAL);
+#ifdef SPH_NO_OUTPUT
+	(void)dst;
+	(void)rnum;
+	(void)u;
+#else
+	for (u = 0; u < rnum; u ++) {
+#if defined BE64
+		sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined LE64
+		sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
+#elif defined BE32
+		sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
+#else
+		sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
+#endif
+	}
+#endif
+}
+
+static void
+SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
+{
+	SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
+}
diff --git a/sph/neoscrypt.c b/sph/neoscrypt.c
new file mode 100644
index 0000000000..202eee36fe
--- /dev/null
+++ b/sph/neoscrypt.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright (c) 2009 Colin Percival, 2011 ArtForz
+ * Copyright (c) 2012 Andrew Moon (floodyberry)
+ * Copyright (c) 2012 Samuel Neves <sneves@dei.uc.pt>
+ * Copyright (c) 2014 John Doering <ghostlander@phoenixcoin.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "neoscrypt.h"
+
+
+#if (WINDOWS)
+/* sizeof(unsigned long) = 4 for MinGW64 */
+typedef unsigned long long ulong;
+#else
+typedef unsigned long ulong;
+#endif
+typedef unsigned int  uint;
+typedef unsigned char uchar;
+typedef unsigned int  bool;
+
+
+#define MIN(a, b) ((a) < (b) ? a : b)
+#define MAX(a, b) ((a) > (b) ? a : b)
+
+
+/* SHA-256 */
+
+static const uint32_t sha256_constants[64] = {
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+#define Ch(x,y,z)  (z ^ (x & (y ^ z)))
+#define Maj(x,y,z) (((x | y) & z) | (x & y))
+#define S0(x)      (ROTR32(x,  2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
+#define S1(x)      (ROTR32(x,  6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
+#define G0(x)      (ROTR32(x,  7) ^ ROTR32(x, 18) ^ (x >>  3))
+#define G1(x)      (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
+#define W0(in,i)   (U8TO32_BE(&in[i * 4]))
+#define W1(i)      (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
+#define STEP(i) \
+    t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
+    t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \
+    r[7] = r[6]; \
+    r[6] = r[5]; \
+    r[5] = r[4]; \
+    r[4] = r[3] + t0; \
+    r[3] = r[2]; \
+    r[2] = r[1]; \
+    r[1] = r[0]; \
+    r[0] = t0 + t1;
+
+
+typedef struct sha256_hash_state_t {
+    uint32_t H[8];
+    uint64_t T;
+    uint32_t leftover;
+    uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
+} sha256_hash_state;
+
+
+static void sha256_blocks(sha256_hash_state *S, const uint8_t *in, size_t blocks) {
+    uint32_t r[8], w[64], t0, t1;
+    size_t i;
+
+    for(i = 0; i < 8; i++)
+      r[i] = S->H[i];
+
+    while(blocks--) {
+        for(i =  0; i < 16; i++) {
+            w[i] = W0(in, i);
+        }
+        for(i = 16; i < 64; i++) {
+            w[i] = W1(i);
+        }
+        for(i =  0; i < 64; i++) {
+            STEP(i);
+        }
+        for(i =  0; i <  8; i++) {
+            r[i] += S->H[i];
+            S->H[i] = r[i];
+        }
+        S->T += SCRYPT_HASH_BLOCK_SIZE * 8;
+        in += SCRYPT_HASH_BLOCK_SIZE;
+    }
+}
+
+static void neoscrypt_hash_init_sha256(sha256_hash_state *S) {
+    S->H[0] = 0x6a09e667;
+    S->H[1] = 0xbb67ae85;
+    S->H[2] = 0x3c6ef372;
+    S->H[3] = 0xa54ff53a;
+    S->H[4] = 0x510e527f;
+    S->H[5] = 0x9b05688c;
+    S->H[6] = 0x1f83d9ab;
+    S->H[7] = 0x5be0cd19;
+    S->T = 0;
+    S->leftover = 0;
+}
+
+static void neoscrypt_hash_update_sha256(sha256_hash_state *S, const uint8_t *in, size_t inlen) {
+    size_t blocks, want;
+
+    /* handle the previous data */
+    if(S->leftover) {
+        want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
+        want = (want < inlen) ? want : inlen;
+        memcpy(S->buffer + S->leftover, in, want);
+        S->leftover += (uint32_t)want;
+        if(S->leftover < SCRYPT_HASH_BLOCK_SIZE)
+          return;
+        in += want;
+        inlen -= want;
+        sha256_blocks(S, S->buffer, 1);
+    }
+
+    /* handle the current data */
+    blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
+    S->leftover = (uint32_t)(inlen - blocks);
+    if(blocks) {
+        sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
+        in += blocks;
+    }
+
+    /* handle leftover data */
+    if(S->leftover)
+      memcpy(S->buffer, in, S->leftover);
+}
+
+static void neoscrypt_hash_finish_sha256(sha256_hash_state *S, uint8_t *hash) {
+    uint64_t t = S->T + (S->leftover * 8);
+
+    S->buffer[S->leftover] = 0x80;
+    if(S->leftover <= 55) {
+        memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
+    } else {
+        memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
+        sha256_blocks(S, S->buffer, 1);
+        memset(S->buffer, 0, 56);
+    }
+
+    U64TO8_BE(S->buffer + 56, t);
+    sha256_blocks(S, S->buffer, 1);
+
+    U32TO8_BE(&hash[ 0], S->H[0]);
+    U32TO8_BE(&hash[ 4], S->H[1]);
+    U32TO8_BE(&hash[ 8], S->H[2]);
+    U32TO8_BE(&hash[12], S->H[3]);
+    U32TO8_BE(&hash[16], S->H[4]);
+    U32TO8_BE(&hash[20], S->H[5]);
+    U32TO8_BE(&hash[24], S->H[6]);
+    U32TO8_BE(&hash[28], S->H[7]);
+}
+
+static void neoscrypt_hash_sha256(hash_digest hash, const uint8_t *m, size_t mlen) {
+    sha256_hash_state st;
+    neoscrypt_hash_init_sha256(&st);
+    neoscrypt_hash_update_sha256(&st, m, mlen);
+    neoscrypt_hash_finish_sha256(&st, hash);
+}
+
+
+/* HMAC for SHA-256 */
+
+typedef struct sha256_hmac_state_t {
+    sha256_hash_state inner, outer;
+} sha256_hmac_state;
+
+static void neoscrypt_hmac_init_sha256(sha256_hmac_state *st, const uint8_t *key, size_t keylen) {
+    uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
+    size_t i;
+
+    neoscrypt_hash_init_sha256(&st->inner);
+    neoscrypt_hash_init_sha256(&st->outer);
+
+    if(keylen <= SCRYPT_HASH_BLOCK_SIZE) {
+        /* use the key directly if it's <= blocksize bytes */
+        memcpy(pad, key, keylen);
+    } else {
+        /* if it's > blocksize bytes, hash it */
+        neoscrypt_hash_sha256(pad, key, keylen);
+    }
+
+    /* inner = (key ^ 0x36) */
+    /* h(inner || ...) */
+    for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
+      pad[i] ^= 0x36;
+    neoscrypt_hash_update_sha256(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
+
+    /* outer = (key ^ 0x5c) */
+    /* h(outer || ...) */
+    for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
+      pad[i] ^= (0x5c ^ 0x36);
+    neoscrypt_hash_update_sha256(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
+}
+
+static void neoscrypt_hmac_update_sha256(sha256_hmac_state *st, const uint8_t *m, size_t mlen) {
+    /* h(inner || m...) */
+    neoscrypt_hash_update_sha256(&st->inner, m, mlen);
+}
+
+static void neoscrypt_hmac_finish_sha256(sha256_hmac_state *st, hash_digest mac) {
+    /* h(inner || m) */
+    hash_digest innerhash;
+    neoscrypt_hash_finish_sha256(&st->inner, innerhash);
+
+    /* h(outer || h(inner || m)) */
+    neoscrypt_hash_update_sha256(&st->outer, innerhash, sizeof(innerhash));
+    neoscrypt_hash_finish_sha256(&st->outer, mac);
+}
+
+
+/* PBKDF2 for SHA-256 */
+
+static void neoscrypt_pbkdf2_sha256(const uint8_t *password, size_t password_len,
+  const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *output, size_t output_len) {
+    sha256_hmac_state hmac_pw, hmac_pw_salt, work;
+    hash_digest ti, u;
+    uint8_t be[4];
+    uint32_t i, j, k, blocks;
+
+    /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
+
+    /* hmac(password, ...) */
+    neoscrypt_hmac_init_sha256(&hmac_pw, password, password_len);
+
+    /* hmac(password, salt...) */
+    hmac_pw_salt = hmac_pw;
+    neoscrypt_hmac_update_sha256(&hmac_pw_salt, salt, salt_len);
+
+    blocks = ((uint32_t)output_len + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
+    for(i = 1; i <= blocks; i++) {
+        /* U1 = hmac(password, salt || be(i)) */
+        U32TO8_BE(be, i);
+        work = hmac_pw_salt;
+        neoscrypt_hmac_update_sha256(&work, be, 4);
+        neoscrypt_hmac_finish_sha256(&work, ti);
+        memcpy(u, ti, sizeof(u));
+
+        /* T[i] = U1 ^ U2 ^ U3... */
+        for(j = 0; j < N - 1; j++) {
+            /* UX = hmac(password, U{X-1}) */
+            work = hmac_pw;
+            neoscrypt_hmac_update_sha256(&work, u, SCRYPT_HASH_DIGEST_SIZE);
+            neoscrypt_hmac_finish_sha256(&work, u);
+
+            /* T[i] ^= UX */
+            for(k = 0; k < sizeof(u); k++)
+              ti[k] ^= u[k];
+        }
+
+        memcpy(output, ti, (output_len > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : output_len);
+        output += SCRYPT_HASH_DIGEST_SIZE;
+        output_len -= SCRYPT_HASH_DIGEST_SIZE;
+    }
+}
+
+
+/* NeoScrypt */
+
+#if defined(ASM)
+
+extern void neoscrypt_salsa(uint *X, uint rounds);
+extern void neoscrypt_salsa_tangle(uint *X, uint count);
+extern void neoscrypt_chacha(uint *X, uint rounds);
+
+extern void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len);
+extern void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len);
+extern void neoscrypt_blkxor(void *dstp, const void *srcp, uint len);
+
+#else
+
+/* Salsa20, rounds must be a multiple of 2 */
+static void neoscrypt_salsa(uint *X, uint rounds) {
+    uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t;
+
+    x0 = X[0];   x1 = X[1];   x2 = X[2];   x3 = X[3];
+    x4 = X[4];   x5 = X[5];   x6 = X[6];   x7 = X[7];
+    x8 = X[8];   x9 = X[9];  x10 = X[10]; x11 = X[11];
+   x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15];
+
+#define quarter(a, b, c, d) \
+    t = a + d; t = ROTL32(t,  7); b ^= t; \
+    t = b + a; t = ROTL32(t,  9); c ^= t; \
+    t = c + b; t = ROTL32(t, 13); d ^= t; \
+    t = d + c; t = ROTL32(t, 18); a ^= t;
+
+    for(; rounds; rounds -= 2) {
+        quarter( x0,  x4,  x8, x12);
+        quarter( x5,  x9, x13,  x1);
+        quarter(x10, x14,  x2,  x6);
+        quarter(x15,  x3,  x7, x11);
+        quarter( x0,  x1,  x2,  x3);
+        quarter( x5,  x6,  x7,  x4);
+        quarter(x10, x11,  x8,  x9);
+        quarter(x15, x12, x13, x14);
+    }
+
+    X[0] += x0;   X[1] += x1;   X[2] += x2;   X[3] += x3;
+    X[4] += x4;   X[5] += x5;   X[6] += x6;   X[7] += x7;
+    X[8] += x8;   X[9] += x9;  X[10] += x10; X[11] += x11;
+   X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15;
+
+#undef quarter
+}
+
+/* ChaCha20, rounds must be a multiple of 2 */
+static void neoscrypt_chacha(uint *X, uint rounds) {
+    uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t;
+
+    x0 = X[0];   x1 = X[1];   x2 = X[2];   x3 = X[3];
+    x4 = X[4];   x5 = X[5];   x6 = X[6];   x7 = X[7];
+    x8 = X[8];   x9 = X[9];  x10 = X[10]; x11 = X[11];
+   x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15];
+
+#define quarter(a,b,c,d) \
+    a += b; t = d ^ a; d = ROTL32(t, 16); \
+    c += d; t = b ^ c; b = ROTL32(t, 12); \
+    a += b; t = d ^ a; d = ROTL32(t,  8); \
+    c += d; t = b ^ c; b = ROTL32(t,  7);
+
+    for(; rounds; rounds -= 2) {
+        quarter( x0,  x4,  x8, x12);
+        quarter( x1,  x5,  x9, x13);
+        quarter( x2,  x6, x10, x14);
+        quarter( x3,  x7, x11, x15);
+        quarter( x0,  x5, x10, x15);
+        quarter( x1,  x6, x11, x12);
+        quarter( x2,  x7,  x8, x13);
+        quarter( x3,  x4,  x9, x14);
+    }
+
+    X[0] += x0;   X[1] += x1;   X[2] += x2;   X[3] += x3;
+    X[4] += x4;   X[5] += x5;   X[6] += x6;   X[7] += x7;
+    X[8] += x8;   X[9] += x9;  X[10] += x10; X[11] += x11;
+   X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15;
+
+#undef quarter
+}
+
+
+/* Fast 32-bit / 64-bit memcpy();
+ * len must be a multiple of 32 bytes */
+static void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len) {
+    ulong *dst = (ulong *) dstp;
+    ulong *src = (ulong *) srcp;
+    uint i;
+
+    for(i = 0; i < (len / sizeof(ulong)); i += 4) {
+        dst[i]     = src[i];
+        dst[i + 1] = src[i + 1];
+        dst[i + 2] = src[i + 2];
+        dst[i + 3] = src[i + 3];
+    }
+}
+
+/* Fast 32-bit / 64-bit block swapper;
+ * len must be a multiple of 32 bytes */
+static void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len) {
+    ulong *blkA = (ulong *) blkAp;
+    ulong *blkB = (ulong *) blkBp;
+    register ulong t0, t1, t2, t3;
+    uint i;
+
+    for(i = 0; i < (len / sizeof(ulong)); i += 4) {
+        t0          = blkA[i];
+        t1          = blkA[i + 1];
+        t2          = blkA[i + 2];
+        t3          = blkA[i + 3];
+        blkA[i]     = blkB[i];
+        blkA[i + 1] = blkB[i + 1];
+        blkA[i + 2] = blkB[i + 2];
+        blkA[i + 3] = blkB[i + 3];
+        blkB[i]     = t0;
+        blkB[i + 1] = t1;
+        blkB[i + 2] = t2;
+        blkB[i + 3] = t3;
+    }
+}
+
+/* Fast 32-bit / 64-bit block XOR engine;
+ * len must be a multiple of 32 bytes */
+static void neoscrypt_blkxor(void *dstp, const void *srcp, uint len) {
+    ulong *dst = (ulong *) dstp;
+    ulong *src = (ulong *) srcp;
+    uint i;
+
+    for(i = 0; i < (len / sizeof(ulong)); i += 4) {
+        dst[i]     ^= src[i];
+        dst[i + 1] ^= src[i + 1];
+        dst[i + 2] ^= src[i + 2];
+        dst[i + 3] ^= src[i + 3];
+    }
+}
+
+#endif
+
+/* 32-bit / 64-bit optimised memcpy() */
+static void neoscrypt_copy(void *dstp, const void *srcp, uint len) {
+    ulong *dst = (ulong *) dstp;
+    ulong *src = (ulong *) srcp;
+    uint i, tail;
+
+    for(i = 0; i < (len / sizeof(ulong)); i++)
+      dst[i] = src[i];
+
+    tail = len & (sizeof(ulong) - 1);
+    if(tail) {
+        uchar *dstb = (uchar *) dstp;
+        uchar *srcb = (uchar *) srcp;
+
+        for(i = len - tail; i < len; i++)
+          dstb[i] = srcb[i];
+    }
+}
+
+/* 32-bit / 64-bit optimised memory erase aka memset() to zero */
+static void neoscrypt_erase(void *dstp, uint len) {
+    const ulong null = 0;
+    ulong *dst = (ulong *) dstp;
+    uint i, tail;
+
+    for(i = 0; i < (len / sizeof(ulong)); i++)
+      dst[i] = null;
+
+    tail = len & (sizeof(ulong) - 1);
+    if(tail) {
+        uchar *dstb = (uchar *) dstp;
+
+        for(i = len - tail; i < len; i++)
+          dstb[i] = (uchar)null;
+    }
+}
+
+/* 32-bit / 64-bit optimised XOR engine */
+static void neoscrypt_xor(void *dstp, const void *srcp, uint len) {
+    ulong *dst = (ulong *) dstp;
+    ulong *src = (ulong *) srcp;
+    uint i, tail;
+
+    for(i = 0; i < (len / sizeof(ulong)); i++)
+      dst[i] ^= src[i];
+
+    tail = len & (sizeof(ulong) - 1);
+    if(tail) {
+        uchar *dstb = (uchar *) dstp;
+        uchar *srcb = (uchar *) srcp;
+
+        for(i = len - tail; i < len; i++)
+          dstb[i] ^= srcb[i];
+    }
+}
+
+
+/* BLAKE2s */
+
+#define BLAKE2S_BLOCK_SIZE    64U
+#define BLAKE2S_OUT_SIZE      32U
+#define BLAKE2S_KEY_SIZE      32U
+
+/* Parameter block of 32 bytes */
+typedef struct blake2s_param_t {
+    uchar digest_length;
+    uchar key_length;
+    uchar fanout;
+    uchar depth;
+    uint  leaf_length;
+    uchar node_offset[6];
+    uchar node_depth;
+    uchar inner_length;
+    uchar salt[8];
+    uchar personal[8];
+} blake2s_param;
+
+/* State block of 180 bytes */
+typedef struct blake2s_state_t {
+    uint  h[8];
+    uint  t[2];
+    uint  f[2];
+    uchar buf[2 * BLAKE2S_BLOCK_SIZE];
+    uint  buflen;
+} blake2s_state;
+
+static const uint blake2s_IV[8] = {
+    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+};
+
+static const uint8_t blake2s_sigma[10][16] = {
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
+    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
+    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
+    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
+    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
+    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
+    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
+    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
+    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
+};
+
+static void blake2s_compress(blake2s_state *S, const uint *buf) {
+    uint i;
+    uint m[16];
+    uint v[16];
+
+    neoscrypt_copy(m, buf, 64);
+    neoscrypt_copy(v, S, 32);
+
+    v[ 8] = blake2s_IV[0];
+    v[ 9] = blake2s_IV[1];
+    v[10] = blake2s_IV[2];
+    v[11] = blake2s_IV[3];
+    v[12] = S->t[0] ^ blake2s_IV[4];
+    v[13] = S->t[1] ^ blake2s_IV[5];
+    v[14] = S->f[0] ^ blake2s_IV[6];
+    v[15] = S->f[1] ^ blake2s_IV[7];
+#define G(r,i,a,b,c,d) \
+  do { \
+    a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+    d = ROTR32(d ^ a, 16); \
+    c = c + d; \
+    b = ROTR32(b ^ c, 12); \
+    a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+    d = ROTR32(d ^ a, 8); \
+    c = c + d; \
+    b = ROTR32(b ^ c, 7); \
+  } while(0)
+#define ROUND(r) \
+  do { \
+    G(r, 0, v[ 0], v[ 4], v[ 8], v[12]); \
+    G(r, 1, v[ 1], v[ 5], v[ 9], v[13]); \
+    G(r, 2, v[ 2], v[ 6], v[10], v[14]); \
+    G(r, 3, v[ 3], v[ 7], v[11], v[15]); \
+    G(r, 4, v[ 0], v[ 5], v[10], v[15]); \
+    G(r, 5, v[ 1], v[ 6], v[11], v[12]); \
+    G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \
+    G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \
+  } while(0)
+    ROUND(0);
+    ROUND(1);
+    ROUND(2);
+    ROUND(3);
+    ROUND(4);
+    ROUND(5);
+    ROUND(6);
+    ROUND(7);
+    ROUND(8);
+    ROUND(9);
+
+  for(i = 0; i < 8; i++)
+    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+
+#undef G
+#undef ROUND
+}
+
+static void blake2s_update(blake2s_state *S, const uchar *input, uint input_size) {
+    uint left, fill;
+
+    while(input_size > 0) {
+        left = S->buflen;
+        fill = 2 * BLAKE2S_BLOCK_SIZE - left;
+        if(input_size > fill) {
+            /* Buffer fill */
+            neoscrypt_copy(S->buf + left, input, fill);
+            S->buflen += fill;
+            /* Counter increment */
+            S->t[0] += BLAKE2S_BLOCK_SIZE;
+            /* Compress */
+            blake2s_compress(S, (uint *) S->buf);
+            /* Shift buffer left */
+            neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, BLAKE2S_BLOCK_SIZE);
+            S->buflen -= BLAKE2S_BLOCK_SIZE;
+            input += fill;
+            input_size -= fill;
+        } else {
+            neoscrypt_copy(S->buf + left, input, input_size);
+            S->buflen += input_size; 
+            /* Do not compress */
+            input += input_size;
+            input_size = 0;
+        }
+    }
+}
+
+static void neoscrypt_blake2s(const void *input, const uint input_size, const void *key, const uchar key_size,
+  void *output, const uchar output_size) {
+    uchar block[BLAKE2S_BLOCK_SIZE];
+    blake2s_param P[1];
+    blake2s_state S[1];
+
+    /* Initialise */
+    neoscrypt_erase(P, 32);
+    P->digest_length = output_size;
+    P->key_length    = key_size;
+    P->fanout        = 1;
+    P->depth         = 1;
+
+    neoscrypt_erase(S, 180);
+    neoscrypt_copy(S, blake2s_IV, 32);
+    neoscrypt_xor(S, P, 32);
+
+    neoscrypt_erase(block, BLAKE2S_BLOCK_SIZE);
+    neoscrypt_copy(block, key, key_size);
+    blake2s_update(S, (uchar *) block, BLAKE2S_BLOCK_SIZE);
+
+    /* Update */
+    blake2s_update(S, (uchar *) input, input_size);
+
+    /* Finish */
+    if(S->buflen > BLAKE2S_BLOCK_SIZE) {
+        S->t[0] += BLAKE2S_BLOCK_SIZE;
+        blake2s_compress(S, (uint *) S->buf);
+        S->buflen -= BLAKE2S_BLOCK_SIZE;
+        neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, S->buflen);
+    }
+    S->t[0] += S->buflen;
+    S->f[0] = ~0U;
+    neoscrypt_erase(S->buf + S->buflen, 2 * BLAKE2S_BLOCK_SIZE - S->buflen);
+    blake2s_compress(S, (uint *) S->buf);
+
+    /* Write back */
+    neoscrypt_copy(output, S, output_size);
+}
+
+
+#define FASTKDF_BUFFER_SIZE 256U
+
+/* FastKDF, a fast buffered key derivation function:
+ * FASTKDF_BUFFER_SIZE must be a power of 2;
+ * password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE;
+ * prf_output_size must be <= prf_key_size; */
+static void neoscrypt_fastkdf(const uchar *password, uint password_len, const uchar *salt, uint salt_len,
+  uint N, uchar *output, uint output_len) {
+    const uint stack_align =  0x40; 
+	const uint kdf_buf_size = 256U; //FASTKDF_BUFFER_SIZE
+    const uint prf_input_size = 64U; //BLAKE2S_BLOCK_SIZE
+    const uint prf_key_size = 32U; //BLAKE2S_KEY_SIZE
+    const uint prf_output_size = 32U; //BLAKE2S_OUT_SIZE
+    uint bufptr, a, b, i, j;
+    uchar *A, *B, *prf_input, *prf_key, *prf_output;
+    uchar *stack;
+	stack = (uchar*)malloc(sizeof(uchar) * 2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + stack_align);
+    /* Align and set up the buffers in stack */
+    //uchar stack[2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + stack_align];
+	
+    A          = &stack[stack_align & ~(stack_align - 1)];
+    B          = &A[kdf_buf_size + prf_input_size];
+    prf_output = &A[2 * kdf_buf_size + prf_input_size + prf_key_size];
+
+    /* Initialise the password buffer */
+    if(password_len > kdf_buf_size)
+       password_len = kdf_buf_size;
+
+    a = kdf_buf_size / password_len;
+    for(i = 0; i < a; i++)
+      neoscrypt_copy(&A[i * password_len], &password[0], password_len);
+    b = kdf_buf_size - a * password_len;
+    if(b)
+      neoscrypt_copy(&A[a * password_len], &password[0], b);
+    neoscrypt_copy(&A[kdf_buf_size], &password[0], prf_input_size);
+
+    /* Initialise the salt buffer */
+    if(salt_len > kdf_buf_size)
+       salt_len = kdf_buf_size;
+
+    a = kdf_buf_size / salt_len;
+    for(i = 0; i < a; i++)
+      neoscrypt_copy(&B[i * salt_len], &salt[0], salt_len);
+    b = kdf_buf_size - a * salt_len;
+    if(b)
+      neoscrypt_copy(&B[a * salt_len], &salt[0], b);
+    neoscrypt_copy(&B[kdf_buf_size], &salt[0], prf_key_size);
+
+    /* The primary iteration */
+    for(i = 0, bufptr = 0; i < N; i++) {
+
+        /* Map the PRF input buffer */
+        prf_input = &A[bufptr];
+
+        /* Map the PRF key buffer */
+        prf_key = &B[bufptr];
+
+        /* PRF */
+        neoscrypt_blake2s(prf_input, prf_input_size, prf_key, prf_key_size, prf_output, prf_output_size);
+
+        /* Calculate the next buffer pointer */
+        for(j = 0, bufptr = 0; j < prf_output_size; j++)
+          bufptr += prf_output[j];
+        bufptr &= (kdf_buf_size - 1);
+
+        /* Modify the salt buffer */
+        neoscrypt_xor(&B[bufptr], &prf_output[0], prf_output_size);
+
+        /* Head modified, tail updated */
+        if(bufptr < prf_key_size)
+          neoscrypt_copy(&B[kdf_buf_size + bufptr], &B[bufptr], MIN(prf_output_size, prf_key_size - bufptr));
+
+        /* Tail modified, head updated */
+        if((kdf_buf_size - bufptr) < prf_output_size)
+          neoscrypt_copy(&B[0], &B[kdf_buf_size], prf_output_size - (kdf_buf_size - bufptr));
+
+    }
+
+    /* Modify and copy into the output buffer */
+    if(output_len > kdf_buf_size)
+       output_len = kdf_buf_size;
+
+    a = kdf_buf_size - bufptr;
+    if(a >= output_len) {
+        neoscrypt_xor(&B[bufptr], &A[0], output_len);
+        neoscrypt_copy(&output[0], &B[bufptr], output_len);
+    } else {
+        neoscrypt_xor(&B[bufptr], &A[0], a);
+        neoscrypt_xor(&B[0], &A[a], output_len - a);
+        neoscrypt_copy(&output[0], &B[bufptr], a);
+        neoscrypt_copy(&output[a], &B[0], output_len - a);
+    }
+
+}
+
+
+/* Configurable optimised block mixer */
+static void neoscrypt_blkmix(uint *X, uint *Y, uint r, uint mixmode) {
+    uint i, mixer, rounds;
+
+    mixer  = mixmode >> 8;
+    rounds = mixmode & 0xFF;
+
+    /* NeoScrypt flow:                   Scrypt flow:
+         Xa ^= Xd;  M(Xa'); Ya = Xa";      Xa ^= Xb;  M(Xa'); Ya = Xa";
+         Xb ^= Xa"; M(Xb'); Yb = Xb";      Xb ^= Xa"; M(Xb'); Yb = Xb";
+         Xc ^= Xb"; M(Xc'); Yc = Xc";      Xa" = Ya;
+         Xd ^= Xc"; M(Xd'); Yd = Xd";      Xb" = Yb;
+         Xa" = Ya; Xb" = Yc;
+         Xc" = Yb; Xd" = Yd; */
+
+    if(r == 1) {
+        neoscrypt_blkxor(&X[0], &X[16], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[0], rounds);
+        else
+          neoscrypt_salsa(&X[0], rounds);
+        neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[16], rounds);
+        else
+          neoscrypt_salsa(&X[16], rounds);
+        return;
+    }
+
+    if(r == 2) {
+        neoscrypt_blkxor(&X[0], &X[48], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[0], rounds);
+        else
+          neoscrypt_salsa(&X[0], rounds);
+        neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[16], rounds);
+        else
+          neoscrypt_salsa(&X[16], rounds);
+        neoscrypt_blkxor(&X[32], &X[16], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[32], rounds);
+        else
+          neoscrypt_salsa(&X[32], rounds);
+        neoscrypt_blkxor(&X[48], &X[32], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[48], rounds);
+        else
+          neoscrypt_salsa(&X[48], rounds);
+        neoscrypt_blkswp(&X[16], &X[32], SCRYPT_BLOCK_SIZE);
+        return;
+    }
+
+    /* Reference code for any reasonable r */
+    for(i = 0; i < 2 * r; i++) {
+        if(i) neoscrypt_blkxor(&X[16 * i], &X[16 * (i - 1)], SCRYPT_BLOCK_SIZE);
+        else  neoscrypt_blkxor(&X[0], &X[16 * (2 * r - 1)], SCRYPT_BLOCK_SIZE);
+        if(mixer)
+          neoscrypt_chacha(&X[16 * i], rounds);
+        else
+          neoscrypt_salsa(&X[16 * i], rounds);
+        neoscrypt_blkcpy(&Y[16 * i], &X[16 * i], SCRYPT_BLOCK_SIZE);
+    }
+    for(i = 0; i < r; i++)
+      neoscrypt_blkcpy(&X[16 * i], &Y[16 * 2 * i], SCRYPT_BLOCK_SIZE);
+    for(i = 0; i < r; i++)
+      neoscrypt_blkcpy(&X[16 * (i + r)], &Y[16 * (2 * i + 1)], SCRYPT_BLOCK_SIZE);
+}
+
+/* NeoScrypt core engine:
+ * p = 1, salt = password;
+ * Basic customisation (required):
+ *   profile bit 0:
+ *     0 = NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20;
+ *     1 = Scrypt(1024, 1, 1) with Salsa20/8;
+ *   profile bits 4 to 1:
+ *     0000 = FastKDF-BLAKE2s;
+ *     0001 = PBKDF2-HMAC-SHA256;
+ * Extended customisation (optional):
+ *   profile bit 31:
+ *     0 = extended customisation absent;
+ *     1 = extended customisation present;
+ *   profile bits 7 to 5 (rfactor):
+ *     000 = r of 1;
+ *     001 = r of 2;
+ *     010 = r of 4;
+ *     ...
+ *     111 = r of 128;
+ *   profile bits 12 to 8 (Nfactor):
+ *     00000 = N of 2;
+ *     00001 = N of 4;
+ *     00010 = N of 8;
+ *     .....
+ *     00110 = N of 128;
+ *     .....
+ *     01001 = N of 1024;
+ *     .....
+ *     11110 = N of 2147483648;
+ *   profile bits 30 to 13 are reserved */
+void neoscrypt(const uchar *password, uchar *output, uint profile) {
+    uint N = 128, r = 2, dblmix = 1, mixmode = 0x14, stack_align = 0x40;
+    uint kdf, i, j;
+    uint *X, *Y, *Z, *V;
+
+    if(profile & 0x1) {
+        N = 1024;        /* N = (1 << (Nfactor + 1)); */
+        r = 1;           /* r = (1 << rfactor); */
+        dblmix = 0;      /* Salsa only */
+        mixmode = 0x08;  /* 8 rounds */
+    }
+
+    if(profile >> 31) {
+        N = (1 << (((profile >> 8) & 0x1F) + 1));
+        r = (1 << ((profile >> 5) & 0x7));
+    }
+    uchar *stack;
+    stack = (uchar*)malloc(((N + 3) * r * 2 * SCRYPT_BLOCK_SIZE + stack_align)*sizeof(uchar));
+    /* X = r * 2 * SCRYPT_BLOCK_SIZE */
+    X = (uint *) &stack[stack_align & ~(stack_align - 1)];
+    /* Z is a copy of X for ChaCha */
+    Z = &X[32 * r];
+    /* Y is an X sized temporal space */
+    Y = &X[64 * r];
+    /* V = N * r * 2 * SCRYPT_BLOCK_SIZE */
+    V = &X[96 * r];
+
+    /* X = KDF(password, salt) */
+    kdf = (profile >> 1) & 0xF;
+
+    switch(kdf) {
+
+        default:
+        case(0x0):
+            neoscrypt_fastkdf(password, 80, password, 80, 32, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE);
+            break;
+
+        case(0x1):
+            neoscrypt_pbkdf2_sha256(password, 80, password, 80, 1, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE);
+            break;
+
+    }
+
+    /* Process ChaCha 1st, Salsa 2nd and XOR them into FastKDF; otherwise Salsa only */
+
+    if(dblmix) {
+        /* blkcpy(Z, X) */
+        neoscrypt_blkcpy(&Z[0], &X[0], r * 2 * SCRYPT_BLOCK_SIZE);
+
+        /* Z = SMix(Z) */
+        for(i = 0; i < N; i++) {
+            /* blkcpy(V, Z) */
+            neoscrypt_blkcpy(&V[i * (32 * r)], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE);
+            /* blkmix(Z, Y) */
+            neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100));
+        }
+        for(i = 0; i < N; i++) {
+            /* integerify(Z) mod N */
+            j = (32 * r) * (Z[16 * (2 * r - 1)] & (N - 1));
+            /* blkxor(Z, V) */
+            neoscrypt_blkxor(&Z[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE);
+            /* blkmix(Z, Y) */
+            neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100));
+        }
+    }
+
+#if (ASM)
+    /* Must be called before and after SSE2 Salsa */
+    neoscrypt_salsa_tangle(&X[0], r * 2);
+#endif
+
+    /* X = SMix(X) */
+    for(i = 0; i < N; i++) {
+        /* blkcpy(V, X) */
+        neoscrypt_blkcpy(&V[i * (32 * r)], &X[0], r * 2 * SCRYPT_BLOCK_SIZE);
+        /* blkmix(X, Y) */
+        neoscrypt_blkmix(&X[0], &Y[0], r, mixmode);
+    }
+    for(i = 0; i < N; i++) {
+        /* integerify(X) mod N */
+        j = (32 * r) * (X[16 * (2 * r - 1)] & (N - 1));
+        /* blkxor(X, V) */
+        neoscrypt_blkxor(&X[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE);
+        /* blkmix(X, Y) */
+        neoscrypt_blkmix(&X[0], &Y[0], r, mixmode);
+    }
+
+#if (ASM)
+    neoscrypt_salsa_tangle(&X[0], r * 2);
+#endif
+
+    if(dblmix)
+      /* blkxor(X, Z) */
+      neoscrypt_blkxor(&X[0], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE);
+
+    /* output = KDF(password, X) */
+    switch(kdf) {
+
+        default:
+        case(0x0):
+            neoscrypt_fastkdf(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 32, output, 32);
+            break;
+
+        case(0x1):
+            neoscrypt_pbkdf2_sha256(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 1, output, 32);
+            break;
+
+    }
+
+}
+
diff --git a/sph/neoscrypt.h b/sph/neoscrypt.h
new file mode 100644
index 0000000000..5c4d4e410a
--- /dev/null
+++ b/sph/neoscrypt.h
@@ -0,0 +1,33 @@
+#if (__cplusplus)
+extern "C" {
+#endif
+
+void neoscrypt(const unsigned char *input, unsigned char *output, unsigned int profile);
+
+#if (__cplusplus)
+}
+#else
+
+#define SCRYPT_BLOCK_SIZE 64
+#define SCRYPT_HASH_BLOCK_SIZE 64
+#define SCRYPT_HASH_DIGEST_SIZE 32
+
+typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE];
+
+#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
+#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
+
+#define U8TO32_BE(p) \
+    (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
+    ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])))
+
+#define U32TO8_BE(p, v) \
+    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
+    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
+
+#define U64TO8_BE(p, v) \
+    U32TO8_BE((p),     (uint32_t)((v) >> 32)); \
+    U32TO8_BE((p) + 4, (uint32_t)((v)      ));
+
+#endif
+
diff --git a/sph/ripemd.c b/sph/ripemd.c
new file mode 100644
index 0000000000..e242ac254b
--- /dev/null
+++ b/sph/ripemd.c
@@ -0,0 +1,833 @@
+/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * RIPEMD-160 implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_ripemd.h"
+
+/*
+ * Round functions for RIPEMD (original).
+ */
+#define F(x, y, z)    ((((y) ^ (z)) & (x)) ^ (z))
+#define G(x, y, z)    (((x) & (y)) | (((x) | (y)) & (z)))
+#define H(x, y, z)    ((x) ^ (y) ^ (z))
+
+static const sph_u32 oIV[5] = {
+	SPH_C32(0x67452301), SPH_C32(0xEFCDAB89),
+	SPH_C32(0x98BADCFE), SPH_C32(0x10325476)
+};
+
+/*
+ * Round functions for RIPEMD-128 and RIPEMD-160.
+ */
+#define F1(x, y, z)   ((x) ^ (y) ^ (z))
+#define F2(x, y, z)   ((((y) ^ (z)) & (x)) ^ (z))
+#define F3(x, y, z)   (((x) | ~(y)) ^ (z))
+#define F4(x, y, z)   ((((x) ^ (y)) & (z)) ^ (y))
+#define F5(x, y, z)   ((x) ^ ((y) | ~(z)))
+
+static const sph_u32 IV[5] = {
+	SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE),
+	SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0)
+};
+
+#define ROTL    SPH_ROTL32
+
+/* ===================================================================== */
+/*
+ * RIPEMD (original hash, deprecated).
+ */
+
+#define FF1(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define GG1(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + G(B, C, D) \
+			+ (X) + SPH_C32(0x5A827999)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define HH1(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
+			+ (X) + SPH_C32(0x6ED9EBA1)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define FF2(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + F(B, C, D) \
+			+ (X) + SPH_C32(0x50A28BE6)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define GG2(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define HH2(A, B, C, D, X, s)   do { \
+		sph_u32 tmp = SPH_T32((A) + H(B, C, D) \
+			+ (X) + SPH_C32(0x5C4DD124)); \
+		(A) = ROTL(tmp, (s)); \
+	} while (0)
+
+#define RIPEMD_ROUND_BODY(in, h)   do { \
+		sph_u32 A1, B1, C1, D1; \
+		sph_u32 A2, B2, C2, D2; \
+		sph_u32 tmp; \
+ \
+		A1 = A2 = (h)[0]; \
+		B1 = B2 = (h)[1]; \
+		C1 = C2 = (h)[2]; \
+		D1 = D2 = (h)[3]; \
+ \
+		FF1(A1, B1, C1, D1, in( 0), 11); \
+		FF1(D1, A1, B1, C1, in( 1), 14); \
+		FF1(C1, D1, A1, B1, in( 2), 15); \
+		FF1(B1, C1, D1, A1, in( 3), 12); \
+		FF1(A1, B1, C1, D1, in( 4),  5); \
+		FF1(D1, A1, B1, C1, in( 5),  8); \
+		FF1(C1, D1, A1, B1, in( 6),  7); \
+		FF1(B1, C1, D1, A1, in( 7),  9); \
+		FF1(A1, B1, C1, D1, in( 8), 11); \
+		FF1(D1, A1, B1, C1, in( 9), 13); \
+		FF1(C1, D1, A1, B1, in(10), 14); \
+		FF1(B1, C1, D1, A1, in(11), 15); \
+		FF1(A1, B1, C1, D1, in(12),  6); \
+		FF1(D1, A1, B1, C1, in(13),  7); \
+		FF1(C1, D1, A1, B1, in(14),  9); \
+		FF1(B1, C1, D1, A1, in(15),  8); \
+ \
+		GG1(A1, B1, C1, D1, in( 7),  7); \
+		GG1(D1, A1, B1, C1, in( 4),  6); \
+		GG1(C1, D1, A1, B1, in(13),  8); \
+		GG1(B1, C1, D1, A1, in( 1), 13); \
+		GG1(A1, B1, C1, D1, in(10), 11); \
+		GG1(D1, A1, B1, C1, in( 6),  9); \
+		GG1(C1, D1, A1, B1, in(15),  7); \
+		GG1(B1, C1, D1, A1, in( 3), 15); \
+		GG1(A1, B1, C1, D1, in(12),  7); \
+		GG1(D1, A1, B1, C1, in( 0), 12); \
+		GG1(C1, D1, A1, B1, in( 9), 15); \
+		GG1(B1, C1, D1, A1, in( 5),  9); \
+		GG1(A1, B1, C1, D1, in(14),  7); \
+		GG1(D1, A1, B1, C1, in( 2), 11); \
+		GG1(C1, D1, A1, B1, in(11), 13); \
+		GG1(B1, C1, D1, A1, in( 8), 12); \
+ \
+		HH1(A1, B1, C1, D1, in( 3), 11); \
+		HH1(D1, A1, B1, C1, in(10), 13); \
+		HH1(C1, D1, A1, B1, in( 2), 14); \
+		HH1(B1, C1, D1, A1, in( 4),  7); \
+		HH1(A1, B1, C1, D1, in( 9), 14); \
+		HH1(D1, A1, B1, C1, in(15),  9); \
+		HH1(C1, D1, A1, B1, in( 8), 13); \
+		HH1(B1, C1, D1, A1, in( 1), 15); \
+		HH1(A1, B1, C1, D1, in(14),  6); \
+		HH1(D1, A1, B1, C1, in( 7),  8); \
+		HH1(C1, D1, A1, B1, in( 0), 13); \
+		HH1(B1, C1, D1, A1, in( 6),  6); \
+		HH1(A1, B1, C1, D1, in(11), 12); \
+		HH1(D1, A1, B1, C1, in(13),  5); \
+		HH1(C1, D1, A1, B1, in( 5),  7); \
+		HH1(B1, C1, D1, A1, in(12),  5); \
+ \
+		FF2(A2, B2, C2, D2, in( 0), 11); \
+		FF2(D2, A2, B2, C2, in( 1), 14); \
+		FF2(C2, D2, A2, B2, in( 2), 15); \
+		FF2(B2, C2, D2, A2, in( 3), 12); \
+		FF2(A2, B2, C2, D2, in( 4),  5); \
+		FF2(D2, A2, B2, C2, in( 5),  8); \
+		FF2(C2, D2, A2, B2, in( 6),  7); \
+		FF2(B2, C2, D2, A2, in( 7),  9); \
+		FF2(A2, B2, C2, D2, in( 8), 11); \
+		FF2(D2, A2, B2, C2, in( 9), 13); \
+		FF2(C2, D2, A2, B2, in(10), 14); \
+		FF2(B2, C2, D2, A2, in(11), 15); \
+		FF2(A2, B2, C2, D2, in(12),  6); \
+		FF2(D2, A2, B2, C2, in(13),  7); \
+		FF2(C2, D2, A2, B2, in(14),  9); \
+		FF2(B2, C2, D2, A2, in(15),  8); \
+ \
+		GG2(A2, B2, C2, D2, in( 7),  7); \
+		GG2(D2, A2, B2, C2, in( 4),  6); \
+		GG2(C2, D2, A2, B2, in(13),  8); \
+		GG2(B2, C2, D2, A2, in( 1), 13); \
+		GG2(A2, B2, C2, D2, in(10), 11); \
+		GG2(D2, A2, B2, C2, in( 6),  9); \
+		GG2(C2, D2, A2, B2, in(15),  7); \
+		GG2(B2, C2, D2, A2, in( 3), 15); \
+		GG2(A2, B2, C2, D2, in(12),  7); \
+		GG2(D2, A2, B2, C2, in( 0), 12); \
+		GG2(C2, D2, A2, B2, in( 9), 15); \
+		GG2(B2, C2, D2, A2, in( 5),  9); \
+		GG2(A2, B2, C2, D2, in(14),  7); \
+		GG2(D2, A2, B2, C2, in( 2), 11); \
+		GG2(C2, D2, A2, B2, in(11), 13); \
+		GG2(B2, C2, D2, A2, in( 8), 12); \
+ \
+		HH2(A2, B2, C2, D2, in( 3), 11); \
+		HH2(D2, A2, B2, C2, in(10), 13); \
+		HH2(C2, D2, A2, B2, in( 2), 14); \
+		HH2(B2, C2, D2, A2, in( 4),  7); \
+		HH2(A2, B2, C2, D2, in( 9), 14); \
+		HH2(D2, A2, B2, C2, in(15),  9); \
+		HH2(C2, D2, A2, B2, in( 8), 13); \
+		HH2(B2, C2, D2, A2, in( 1), 15); \
+		HH2(A2, B2, C2, D2, in(14),  6); \
+		HH2(D2, A2, B2, C2, in( 7),  8); \
+		HH2(C2, D2, A2, B2, in( 0), 13); \
+		HH2(B2, C2, D2, A2, in( 6),  6); \
+		HH2(A2, B2, C2, D2, in(11), 12); \
+		HH2(D2, A2, B2, C2, in(13),  5); \
+		HH2(C2, D2, A2, B2, in( 5),  7); \
+		HH2(B2, C2, D2, A2, in(12),  5); \
+ \
+		tmp = SPH_T32((h)[1] + C1 + D2); \
+		(h)[1] = SPH_T32((h)[2] + D1 + A2); \
+		(h)[2] = SPH_T32((h)[3] + A1 + B2); \
+		(h)[3] = SPH_T32((h)[0] + B1 + C2); \
+		(h)[0] = tmp; \
+	} while (0)
+
+/*
+ * One round of RIPEMD. The data must be aligned for 32-bit access.
+ */
+static void
+ripemd_round(const unsigned char *data, sph_u32 r[5])
+{
+#if SPH_LITTLE_FAST
+
+#define RIPEMD_IN(x)   sph_dec32le_aligned(data + (4 * (x)))
+
+#else
+
+	sph_u32 X_var[16];
+	int i;
+
+	for (i = 0; i < 16; i ++)
+		X_var[i] = sph_dec32le_aligned(data + 4 * i);
+#define RIPEMD_IN(x)   X_var[x]
+
+#endif
+	RIPEMD_ROUND_BODY(RIPEMD_IN, r);
+#undef RIPEMD_IN
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd_init(void *cc)
+{
+	sph_ripemd_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, oIV, sizeof sc->val);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define RFUN   ripemd_round
+#define HASH   ripemd
+#define LE32   1
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+#undef LE32
+
+/* see sph_ripemd.h */
+void
+sph_ripemd_close(void *cc, void *dst)
+{
+	ripemd_close(cc, dst, 4);
+	sph_ripemd_init(cc);
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4])
+{
+#define RIPEMD_IN(x)   msg[x]
+	RIPEMD_ROUND_BODY(RIPEMD_IN, val);
+#undef RIPEMD_IN
+}
+
+/* ===================================================================== */
+/*
+ * RIPEMD-128.
+ */
+
+/*
+ * Round constants for RIPEMD-128.
+ */
+#define sK11   SPH_C32(0x00000000)
+#define sK12   SPH_C32(0x5A827999)
+#define sK13   SPH_C32(0x6ED9EBA1)
+#define sK14   SPH_C32(0x8F1BBCDC)
+
+#define sK21   SPH_C32(0x50A28BE6)
+#define sK22   SPH_C32(0x5C4DD124)
+#define sK23   SPH_C32(0x6D703EF3)
+#define sK24   SPH_C32(0x00000000)
+
+#define sRR(a, b, c, d, f, s, r, k)   do { \
+		a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \
+	} while (0)
+
+#define sROUND1(a, b, c, d, f, s, r, k)  \
+	sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k)
+
+#define sROUND2(a, b, c, d, f, s, r, k)  \
+	sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k)
+
+/*
+ * This macro defines the body for a RIPEMD-128 compression function
+ * implementation. The "in" parameter should evaluate, when applied to a
+ * numerical input parameter from 0 to 15, to an expression which yields
+ * the corresponding input block. The "h" parameter should evaluate to
+ * an array or pointer expression designating the array of 4 words which
+ * contains the input and output of the compression function.
+ */
+
+#define RIPEMD128_ROUND_BODY(in, h)   do { \
+		sph_u32 A1, B1, C1, D1; \
+		sph_u32 A2, B2, C2, D2; \
+		sph_u32 tmp; \
+ \
+		A1 = A2 = (h)[0]; \
+		B1 = B2 = (h)[1]; \
+		C1 = C2 = (h)[2]; \
+		D1 = D2 = (h)[3]; \
+ \
+		sROUND1(A, B, C, D, F1, 11, in( 0),  1); \
+		sROUND1(D, A, B, C, F1, 14, in( 1),  1); \
+		sROUND1(C, D, A, B, F1, 15, in( 2),  1); \
+		sROUND1(B, C, D, A, F1, 12, in( 3),  1); \
+		sROUND1(A, B, C, D, F1,  5, in( 4),  1); \
+		sROUND1(D, A, B, C, F1,  8, in( 5),  1); \
+		sROUND1(C, D, A, B, F1,  7, in( 6),  1); \
+		sROUND1(B, C, D, A, F1,  9, in( 7),  1); \
+		sROUND1(A, B, C, D, F1, 11, in( 8),  1); \
+		sROUND1(D, A, B, C, F1, 13, in( 9),  1); \
+		sROUND1(C, D, A, B, F1, 14, in(10),  1); \
+		sROUND1(B, C, D, A, F1, 15, in(11),  1); \
+		sROUND1(A, B, C, D, F1,  6, in(12),  1); \
+		sROUND1(D, A, B, C, F1,  7, in(13),  1); \
+		sROUND1(C, D, A, B, F1,  9, in(14),  1); \
+		sROUND1(B, C, D, A, F1,  8, in(15),  1); \
+ \
+		sROUND1(A, B, C, D, F2,  7, in( 7),  2); \
+		sROUND1(D, A, B, C, F2,  6, in( 4),  2); \
+		sROUND1(C, D, A, B, F2,  8, in(13),  2); \
+		sROUND1(B, C, D, A, F2, 13, in( 1),  2); \
+		sROUND1(A, B, C, D, F2, 11, in(10),  2); \
+		sROUND1(D, A, B, C, F2,  9, in( 6),  2); \
+		sROUND1(C, D, A, B, F2,  7, in(15),  2); \
+		sROUND1(B, C, D, A, F2, 15, in( 3),  2); \
+		sROUND1(A, B, C, D, F2,  7, in(12),  2); \
+		sROUND1(D, A, B, C, F2, 12, in( 0),  2); \
+		sROUND1(C, D, A, B, F2, 15, in( 9),  2); \
+		sROUND1(B, C, D, A, F2,  9, in( 5),  2); \
+		sROUND1(A, B, C, D, F2, 11, in( 2),  2); \
+		sROUND1(D, A, B, C, F2,  7, in(14),  2); \
+		sROUND1(C, D, A, B, F2, 13, in(11),  2); \
+		sROUND1(B, C, D, A, F2, 12, in( 8),  2); \
+ \
+		sROUND1(A, B, C, D, F3, 11, in( 3),  3); \
+		sROUND1(D, A, B, C, F3, 13, in(10),  3); \
+		sROUND1(C, D, A, B, F3,  6, in(14),  3); \
+		sROUND1(B, C, D, A, F3,  7, in( 4),  3); \
+		sROUND1(A, B, C, D, F3, 14, in( 9),  3); \
+		sROUND1(D, A, B, C, F3,  9, in(15),  3); \
+		sROUND1(C, D, A, B, F3, 13, in( 8),  3); \
+		sROUND1(B, C, D, A, F3, 15, in( 1),  3); \
+		sROUND1(A, B, C, D, F3, 14, in( 2),  3); \
+		sROUND1(D, A, B, C, F3,  8, in( 7),  3); \
+		sROUND1(C, D, A, B, F3, 13, in( 0),  3); \
+		sROUND1(B, C, D, A, F3,  6, in( 6),  3); \
+		sROUND1(A, B, C, D, F3,  5, in(13),  3); \
+		sROUND1(D, A, B, C, F3, 12, in(11),  3); \
+		sROUND1(C, D, A, B, F3,  7, in( 5),  3); \
+		sROUND1(B, C, D, A, F3,  5, in(12),  3); \
+ \
+		sROUND1(A, B, C, D, F4, 11, in( 1),  4); \
+		sROUND1(D, A, B, C, F4, 12, in( 9),  4); \
+		sROUND1(C, D, A, B, F4, 14, in(11),  4); \
+		sROUND1(B, C, D, A, F4, 15, in(10),  4); \
+		sROUND1(A, B, C, D, F4, 14, in( 0),  4); \
+		sROUND1(D, A, B, C, F4, 15, in( 8),  4); \
+		sROUND1(C, D, A, B, F4,  9, in(12),  4); \
+		sROUND1(B, C, D, A, F4,  8, in( 4),  4); \
+		sROUND1(A, B, C, D, F4,  9, in(13),  4); \
+		sROUND1(D, A, B, C, F4, 14, in( 3),  4); \
+		sROUND1(C, D, A, B, F4,  5, in( 7),  4); \
+		sROUND1(B, C, D, A, F4,  6, in(15),  4); \
+		sROUND1(A, B, C, D, F4,  8, in(14),  4); \
+		sROUND1(D, A, B, C, F4,  6, in( 5),  4); \
+		sROUND1(C, D, A, B, F4,  5, in( 6),  4); \
+		sROUND1(B, C, D, A, F4, 12, in( 2),  4); \
+ \
+		sROUND2(A, B, C, D, F4,  8, in( 5),  1); \
+		sROUND2(D, A, B, C, F4,  9, in(14),  1); \
+		sROUND2(C, D, A, B, F4,  9, in( 7),  1); \
+		sROUND2(B, C, D, A, F4, 11, in( 0),  1); \
+		sROUND2(A, B, C, D, F4, 13, in( 9),  1); \
+		sROUND2(D, A, B, C, F4, 15, in( 2),  1); \
+		sROUND2(C, D, A, B, F4, 15, in(11),  1); \
+		sROUND2(B, C, D, A, F4,  5, in( 4),  1); \
+		sROUND2(A, B, C, D, F4,  7, in(13),  1); \
+		sROUND2(D, A, B, C, F4,  7, in( 6),  1); \
+		sROUND2(C, D, A, B, F4,  8, in(15),  1); \
+		sROUND2(B, C, D, A, F4, 11, in( 8),  1); \
+		sROUND2(A, B, C, D, F4, 14, in( 1),  1); \
+		sROUND2(D, A, B, C, F4, 14, in(10),  1); \
+		sROUND2(C, D, A, B, F4, 12, in( 3),  1); \
+		sROUND2(B, C, D, A, F4,  6, in(12),  1); \
+ \
+		sROUND2(A, B, C, D, F3,  9, in( 6),  2); \
+		sROUND2(D, A, B, C, F3, 13, in(11),  2); \
+		sROUND2(C, D, A, B, F3, 15, in( 3),  2); \
+		sROUND2(B, C, D, A, F3,  7, in( 7),  2); \
+		sROUND2(A, B, C, D, F3, 12, in( 0),  2); \
+		sROUND2(D, A, B, C, F3,  8, in(13),  2); \
+		sROUND2(C, D, A, B, F3,  9, in( 5),  2); \
+		sROUND2(B, C, D, A, F3, 11, in(10),  2); \
+		sROUND2(A, B, C, D, F3,  7, in(14),  2); \
+		sROUND2(D, A, B, C, F3,  7, in(15),  2); \
+		sROUND2(C, D, A, B, F3, 12, in( 8),  2); \
+		sROUND2(B, C, D, A, F3,  7, in(12),  2); \
+		sROUND2(A, B, C, D, F3,  6, in( 4),  2); \
+		sROUND2(D, A, B, C, F3, 15, in( 9),  2); \
+		sROUND2(C, D, A, B, F3, 13, in( 1),  2); \
+		sROUND2(B, C, D, A, F3, 11, in( 2),  2); \
+ \
+		sROUND2(A, B, C, D, F2,  9, in(15),  3); \
+		sROUND2(D, A, B, C, F2,  7, in( 5),  3); \
+		sROUND2(C, D, A, B, F2, 15, in( 1),  3); \
+		sROUND2(B, C, D, A, F2, 11, in( 3),  3); \
+		sROUND2(A, B, C, D, F2,  8, in( 7),  3); \
+		sROUND2(D, A, B, C, F2,  6, in(14),  3); \
+		sROUND2(C, D, A, B, F2,  6, in( 6),  3); \
+		sROUND2(B, C, D, A, F2, 14, in( 9),  3); \
+		sROUND2(A, B, C, D, F2, 12, in(11),  3); \
+		sROUND2(D, A, B, C, F2, 13, in( 8),  3); \
+		sROUND2(C, D, A, B, F2,  5, in(12),  3); \
+		sROUND2(B, C, D, A, F2, 14, in( 2),  3); \
+		sROUND2(A, B, C, D, F2, 13, in(10),  3); \
+		sROUND2(D, A, B, C, F2, 13, in( 0),  3); \
+		sROUND2(C, D, A, B, F2,  7, in( 4),  3); \
+		sROUND2(B, C, D, A, F2,  5, in(13),  3); \
+ \
+		sROUND2(A, B, C, D, F1, 15, in( 8),  4); \
+		sROUND2(D, A, B, C, F1,  5, in( 6),  4); \
+		sROUND2(C, D, A, B, F1,  8, in( 4),  4); \
+		sROUND2(B, C, D, A, F1, 11, in( 1),  4); \
+		sROUND2(A, B, C, D, F1, 14, in( 3),  4); \
+		sROUND2(D, A, B, C, F1, 14, in(11),  4); \
+		sROUND2(C, D, A, B, F1,  6, in(15),  4); \
+		sROUND2(B, C, D, A, F1, 14, in( 0),  4); \
+		sROUND2(A, B, C, D, F1,  6, in( 5),  4); \
+		sROUND2(D, A, B, C, F1,  9, in(12),  4); \
+		sROUND2(C, D, A, B, F1, 12, in( 2),  4); \
+		sROUND2(B, C, D, A, F1,  9, in(13),  4); \
+		sROUND2(A, B, C, D, F1, 12, in( 9),  4); \
+		sROUND2(D, A, B, C, F1,  5, in( 7),  4); \
+		sROUND2(C, D, A, B, F1, 15, in(10),  4); \
+		sROUND2(B, C, D, A, F1,  8, in(14),  4); \
+ \
+		tmp = SPH_T32((h)[1] + C1 + D2); \
+		(h)[1] = SPH_T32((h)[2] + D1 + A2); \
+		(h)[2] = SPH_T32((h)[3] + A1 + B2); \
+		(h)[3] = SPH_T32((h)[0] + B1 + C2); \
+		(h)[0] = tmp; \
+	} while (0)
+
+/*
+ * One round of RIPEMD-128. The data must be aligned for 32-bit access.
+ */
+static void
+ripemd128_round(const unsigned char *data, sph_u32 r[5])
+{
+#if SPH_LITTLE_FAST
+
+#define RIPEMD128_IN(x)   sph_dec32le_aligned(data + (4 * (x)))
+
+#else
+
+	sph_u32 X_var[16];
+	int i;
+
+	for (i = 0; i < 16; i ++)
+		X_var[i] = sph_dec32le_aligned(data + 4 * i);
+#define RIPEMD128_IN(x)   X_var[x]
+
+#endif
+	RIPEMD128_ROUND_BODY(RIPEMD128_IN, r);
+#undef RIPEMD128_IN
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd128_init(void *cc)
+{
+	sph_ripemd128_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, IV, sizeof sc->val);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define RFUN   ripemd128_round
+#define HASH   ripemd128
+#define LE32   1
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+#undef LE32
+
+/* see sph_ripemd.h */
+void
+sph_ripemd128_close(void *cc, void *dst)
+{
+	ripemd128_close(cc, dst, 4);
+	sph_ripemd128_init(cc);
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4])
+{
+#define RIPEMD128_IN(x)   msg[x]
+	RIPEMD128_ROUND_BODY(RIPEMD128_IN, val);
+#undef RIPEMD128_IN
+}
+
+/* ===================================================================== */
+/*
+ * RIPEMD-160.
+ */
+
+/*
+ * Round constants for RIPEMD-160.
+ */
+#define K11    SPH_C32(0x00000000)
+#define K12    SPH_C32(0x5A827999)
+#define K13    SPH_C32(0x6ED9EBA1)
+#define K14    SPH_C32(0x8F1BBCDC)
+#define K15    SPH_C32(0xA953FD4E)
+
+#define K21    SPH_C32(0x50A28BE6)
+#define K22    SPH_C32(0x5C4DD124)
+#define K23    SPH_C32(0x6D703EF3)
+#define K24    SPH_C32(0x7A6D76E9)
+#define K25    SPH_C32(0x00000000)
+
+#define RR(a, b, c, d, e, f, s, r, k)   do { \
+		a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
+		c = ROTL(c, 10); \
+	} while (0)
+
+#define ROUND1(a, b, c, d, e, f, s, r, k)  \
+	RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
+
+#define ROUND2(a, b, c, d, e, f, s, r, k)  \
+	RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
+
+/*
+ * This macro defines the body for a RIPEMD-160 compression function
+ * implementation. The "in" parameter should evaluate, when applied to a
+ * numerical input parameter from 0 to 15, to an expression which yields
+ * the corresponding input block. The "h" parameter should evaluate to
+ * an array or pointer expression designating the array of 5 words which
+ * contains the input and output of the compression function.
+ */
+
+#define RIPEMD160_ROUND_BODY(in, h)   do { \
+		sph_u32 A1, B1, C1, D1, E1; \
+		sph_u32 A2, B2, C2, D2, E2; \
+		sph_u32 tmp; \
+ \
+		A1 = A2 = (h)[0]; \
+		B1 = B2 = (h)[1]; \
+		C1 = C2 = (h)[2]; \
+		D1 = D2 = (h)[3]; \
+		E1 = E2 = (h)[4]; \
+ \
+		ROUND1(A, B, C, D, E, F1, 11, in( 0),  1); \
+		ROUND1(E, A, B, C, D, F1, 14, in( 1),  1); \
+		ROUND1(D, E, A, B, C, F1, 15, in( 2),  1); \
+		ROUND1(C, D, E, A, B, F1, 12, in( 3),  1); \
+		ROUND1(B, C, D, E, A, F1,  5, in( 4),  1); \
+		ROUND1(A, B, C, D, E, F1,  8, in( 5),  1); \
+		ROUND1(E, A, B, C, D, F1,  7, in( 6),  1); \
+		ROUND1(D, E, A, B, C, F1,  9, in( 7),  1); \
+		ROUND1(C, D, E, A, B, F1, 11, in( 8),  1); \
+		ROUND1(B, C, D, E, A, F1, 13, in( 9),  1); \
+		ROUND1(A, B, C, D, E, F1, 14, in(10),  1); \
+		ROUND1(E, A, B, C, D, F1, 15, in(11),  1); \
+		ROUND1(D, E, A, B, C, F1,  6, in(12),  1); \
+		ROUND1(C, D, E, A, B, F1,  7, in(13),  1); \
+		ROUND1(B, C, D, E, A, F1,  9, in(14),  1); \
+		ROUND1(A, B, C, D, E, F1,  8, in(15),  1); \
+ \
+		ROUND1(E, A, B, C, D, F2,  7, in( 7),  2); \
+		ROUND1(D, E, A, B, C, F2,  6, in( 4),  2); \
+		ROUND1(C, D, E, A, B, F2,  8, in(13),  2); \
+		ROUND1(B, C, D, E, A, F2, 13, in( 1),  2); \
+		ROUND1(A, B, C, D, E, F2, 11, in(10),  2); \
+		ROUND1(E, A, B, C, D, F2,  9, in( 6),  2); \
+		ROUND1(D, E, A, B, C, F2,  7, in(15),  2); \
+		ROUND1(C, D, E, A, B, F2, 15, in( 3),  2); \
+		ROUND1(B, C, D, E, A, F2,  7, in(12),  2); \
+		ROUND1(A, B, C, D, E, F2, 12, in( 0),  2); \
+		ROUND1(E, A, B, C, D, F2, 15, in( 9),  2); \
+		ROUND1(D, E, A, B, C, F2,  9, in( 5),  2); \
+		ROUND1(C, D, E, A, B, F2, 11, in( 2),  2); \
+		ROUND1(B, C, D, E, A, F2,  7, in(14),  2); \
+		ROUND1(A, B, C, D, E, F2, 13, in(11),  2); \
+		ROUND1(E, A, B, C, D, F2, 12, in( 8),  2); \
+ \
+		ROUND1(D, E, A, B, C, F3, 11, in( 3),  3); \
+		ROUND1(C, D, E, A, B, F3, 13, in(10),  3); \
+		ROUND1(B, C, D, E, A, F3,  6, in(14),  3); \
+		ROUND1(A, B, C, D, E, F3,  7, in( 4),  3); \
+		ROUND1(E, A, B, C, D, F3, 14, in( 9),  3); \
+		ROUND1(D, E, A, B, C, F3,  9, in(15),  3); \
+		ROUND1(C, D, E, A, B, F3, 13, in( 8),  3); \
+		ROUND1(B, C, D, E, A, F3, 15, in( 1),  3); \
+		ROUND1(A, B, C, D, E, F3, 14, in( 2),  3); \
+		ROUND1(E, A, B, C, D, F3,  8, in( 7),  3); \
+		ROUND1(D, E, A, B, C, F3, 13, in( 0),  3); \
+		ROUND1(C, D, E, A, B, F3,  6, in( 6),  3); \
+		ROUND1(B, C, D, E, A, F3,  5, in(13),  3); \
+		ROUND1(A, B, C, D, E, F3, 12, in(11),  3); \
+		ROUND1(E, A, B, C, D, F3,  7, in( 5),  3); \
+		ROUND1(D, E, A, B, C, F3,  5, in(12),  3); \
+ \
+		ROUND1(C, D, E, A, B, F4, 11, in( 1),  4); \
+		ROUND1(B, C, D, E, A, F4, 12, in( 9),  4); \
+		ROUND1(A, B, C, D, E, F4, 14, in(11),  4); \
+		ROUND1(E, A, B, C, D, F4, 15, in(10),  4); \
+		ROUND1(D, E, A, B, C, F4, 14, in( 0),  4); \
+		ROUND1(C, D, E, A, B, F4, 15, in( 8),  4); \
+		ROUND1(B, C, D, E, A, F4,  9, in(12),  4); \
+		ROUND1(A, B, C, D, E, F4,  8, in( 4),  4); \
+		ROUND1(E, A, B, C, D, F4,  9, in(13),  4); \
+		ROUND1(D, E, A, B, C, F4, 14, in( 3),  4); \
+		ROUND1(C, D, E, A, B, F4,  5, in( 7),  4); \
+		ROUND1(B, C, D, E, A, F4,  6, in(15),  4); \
+		ROUND1(A, B, C, D, E, F4,  8, in(14),  4); \
+		ROUND1(E, A, B, C, D, F4,  6, in( 5),  4); \
+		ROUND1(D, E, A, B, C, F4,  5, in( 6),  4); \
+		ROUND1(C, D, E, A, B, F4, 12, in( 2),  4); \
+ \
+		ROUND1(B, C, D, E, A, F5,  9, in( 4),  5); \
+		ROUND1(A, B, C, D, E, F5, 15, in( 0),  5); \
+		ROUND1(E, A, B, C, D, F5,  5, in( 5),  5); \
+		ROUND1(D, E, A, B, C, F5, 11, in( 9),  5); \
+		ROUND1(C, D, E, A, B, F5,  6, in( 7),  5); \
+		ROUND1(B, C, D, E, A, F5,  8, in(12),  5); \
+		ROUND1(A, B, C, D, E, F5, 13, in( 2),  5); \
+		ROUND1(E, A, B, C, D, F5, 12, in(10),  5); \
+		ROUND1(D, E, A, B, C, F5,  5, in(14),  5); \
+		ROUND1(C, D, E, A, B, F5, 12, in( 1),  5); \
+		ROUND1(B, C, D, E, A, F5, 13, in( 3),  5); \
+		ROUND1(A, B, C, D, E, F5, 14, in( 8),  5); \
+		ROUND1(E, A, B, C, D, F5, 11, in(11),  5); \
+		ROUND1(D, E, A, B, C, F5,  8, in( 6),  5); \
+		ROUND1(C, D, E, A, B, F5,  5, in(15),  5); \
+		ROUND1(B, C, D, E, A, F5,  6, in(13),  5); \
+ \
+		ROUND2(A, B, C, D, E, F5,  8, in( 5),  1); \
+		ROUND2(E, A, B, C, D, F5,  9, in(14),  1); \
+		ROUND2(D, E, A, B, C, F5,  9, in( 7),  1); \
+		ROUND2(C, D, E, A, B, F5, 11, in( 0),  1); \
+		ROUND2(B, C, D, E, A, F5, 13, in( 9),  1); \
+		ROUND2(A, B, C, D, E, F5, 15, in( 2),  1); \
+		ROUND2(E, A, B, C, D, F5, 15, in(11),  1); \
+		ROUND2(D, E, A, B, C, F5,  5, in( 4),  1); \
+		ROUND2(C, D, E, A, B, F5,  7, in(13),  1); \
+		ROUND2(B, C, D, E, A, F5,  7, in( 6),  1); \
+		ROUND2(A, B, C, D, E, F5,  8, in(15),  1); \
+		ROUND2(E, A, B, C, D, F5, 11, in( 8),  1); \
+		ROUND2(D, E, A, B, C, F5, 14, in( 1),  1); \
+		ROUND2(C, D, E, A, B, F5, 14, in(10),  1); \
+		ROUND2(B, C, D, E, A, F5, 12, in( 3),  1); \
+		ROUND2(A, B, C, D, E, F5,  6, in(12),  1); \
+ \
+		ROUND2(E, A, B, C, D, F4,  9, in( 6),  2); \
+		ROUND2(D, E, A, B, C, F4, 13, in(11),  2); \
+		ROUND2(C, D, E, A, B, F4, 15, in( 3),  2); \
+		ROUND2(B, C, D, E, A, F4,  7, in( 7),  2); \
+		ROUND2(A, B, C, D, E, F4, 12, in( 0),  2); \
+		ROUND2(E, A, B, C, D, F4,  8, in(13),  2); \
+		ROUND2(D, E, A, B, C, F4,  9, in( 5),  2); \
+		ROUND2(C, D, E, A, B, F4, 11, in(10),  2); \
+		ROUND2(B, C, D, E, A, F4,  7, in(14),  2); \
+		ROUND2(A, B, C, D, E, F4,  7, in(15),  2); \
+		ROUND2(E, A, B, C, D, F4, 12, in( 8),  2); \
+		ROUND2(D, E, A, B, C, F4,  7, in(12),  2); \
+		ROUND2(C, D, E, A, B, F4,  6, in( 4),  2); \
+		ROUND2(B, C, D, E, A, F4, 15, in( 9),  2); \
+		ROUND2(A, B, C, D, E, F4, 13, in( 1),  2); \
+		ROUND2(E, A, B, C, D, F4, 11, in( 2),  2); \
+ \
+		ROUND2(D, E, A, B, C, F3,  9, in(15),  3); \
+		ROUND2(C, D, E, A, B, F3,  7, in( 5),  3); \
+		ROUND2(B, C, D, E, A, F3, 15, in( 1),  3); \
+		ROUND2(A, B, C, D, E, F3, 11, in( 3),  3); \
+		ROUND2(E, A, B, C, D, F3,  8, in( 7),  3); \
+		ROUND2(D, E, A, B, C, F3,  6, in(14),  3); \
+		ROUND2(C, D, E, A, B, F3,  6, in( 6),  3); \
+		ROUND2(B, C, D, E, A, F3, 14, in( 9),  3); \
+		ROUND2(A, B, C, D, E, F3, 12, in(11),  3); \
+		ROUND2(E, A, B, C, D, F3, 13, in( 8),  3); \
+		ROUND2(D, E, A, B, C, F3,  5, in(12),  3); \
+		ROUND2(C, D, E, A, B, F3, 14, in( 2),  3); \
+		ROUND2(B, C, D, E, A, F3, 13, in(10),  3); \
+		ROUND2(A, B, C, D, E, F3, 13, in( 0),  3); \
+		ROUND2(E, A, B, C, D, F3,  7, in( 4),  3); \
+		ROUND2(D, E, A, B, C, F3,  5, in(13),  3); \
+ \
+		ROUND2(C, D, E, A, B, F2, 15, in( 8),  4); \
+		ROUND2(B, C, D, E, A, F2,  5, in( 6),  4); \
+		ROUND2(A, B, C, D, E, F2,  8, in( 4),  4); \
+		ROUND2(E, A, B, C, D, F2, 11, in( 1),  4); \
+		ROUND2(D, E, A, B, C, F2, 14, in( 3),  4); \
+		ROUND2(C, D, E, A, B, F2, 14, in(11),  4); \
+		ROUND2(B, C, D, E, A, F2,  6, in(15),  4); \
+		ROUND2(A, B, C, D, E, F2, 14, in( 0),  4); \
+		ROUND2(E, A, B, C, D, F2,  6, in( 5),  4); \
+		ROUND2(D, E, A, B, C, F2,  9, in(12),  4); \
+		ROUND2(C, D, E, A, B, F2, 12, in( 2),  4); \
+		ROUND2(B, C, D, E, A, F2,  9, in(13),  4); \
+		ROUND2(A, B, C, D, E, F2, 12, in( 9),  4); \
+		ROUND2(E, A, B, C, D, F2,  5, in( 7),  4); \
+		ROUND2(D, E, A, B, C, F2, 15, in(10),  4); \
+		ROUND2(C, D, E, A, B, F2,  8, in(14),  4); \
+ \
+		ROUND2(B, C, D, E, A, F1,  8, in(12),  5); \
+		ROUND2(A, B, C, D, E, F1,  5, in(15),  5); \
+		ROUND2(E, A, B, C, D, F1, 12, in(10),  5); \
+		ROUND2(D, E, A, B, C, F1,  9, in( 4),  5); \
+		ROUND2(C, D, E, A, B, F1, 12, in( 1),  5); \
+		ROUND2(B, C, D, E, A, F1,  5, in( 5),  5); \
+		ROUND2(A, B, C, D, E, F1, 14, in( 8),  5); \
+		ROUND2(E, A, B, C, D, F1,  6, in( 7),  5); \
+		ROUND2(D, E, A, B, C, F1,  8, in( 6),  5); \
+		ROUND2(C, D, E, A, B, F1, 13, in( 2),  5); \
+		ROUND2(B, C, D, E, A, F1,  6, in(13),  5); \
+		ROUND2(A, B, C, D, E, F1,  5, in(14),  5); \
+		ROUND2(E, A, B, C, D, F1, 15, in( 0),  5); \
+		ROUND2(D, E, A, B, C, F1, 13, in( 3),  5); \
+		ROUND2(C, D, E, A, B, F1, 11, in( 9),  5); \
+		ROUND2(B, C, D, E, A, F1, 11, in(11),  5); \
+ \
+		tmp = SPH_T32((h)[1] + C1 + D2); \
+		(h)[1] = SPH_T32((h)[2] + D1 + E2); \
+		(h)[2] = SPH_T32((h)[3] + E1 + A2); \
+		(h)[3] = SPH_T32((h)[4] + A1 + B2); \
+		(h)[4] = SPH_T32((h)[0] + B1 + C2); \
+		(h)[0] = tmp; \
+	} while (0)
+
+/*
+ * One round of RIPEMD-160. The data must be aligned for 32-bit access.
+ */
+static void
+ripemd160_round(const unsigned char *data, sph_u32 r[5])
+{
+#if SPH_LITTLE_FAST
+
+#define RIPEMD160_IN(x)   sph_dec32le_aligned(data + (4 * (x)))
+
+#else
+
+	sph_u32 X_var[16];
+	int i;
+
+	for (i = 0; i < 16; i ++)
+		X_var[i] = sph_dec32le_aligned(data + 4 * i);
+#define RIPEMD160_IN(x)   X_var[x]
+
+#endif
+	RIPEMD160_ROUND_BODY(RIPEMD160_IN, r);
+#undef RIPEMD160_IN
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd160_init(void *cc)
+{
+	sph_ripemd160_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, IV, sizeof sc->val);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define RFUN   ripemd160_round
+#define HASH   ripemd160
+#define LE32   1
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+#undef LE32
+
+/* see sph_ripemd.h */
+void
+sph_ripemd160_close(void *cc, void *dst)
+{
+	ripemd160_close(cc, dst, 5);
+	sph_ripemd160_init(cc);
+}
+
+/* see sph_ripemd.h */
+void
+sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5])
+{
+#define RIPEMD160_IN(x)   msg[x]
+	RIPEMD160_ROUND_BODY(RIPEMD160_IN, val);
+#undef RIPEMD160_IN
+}
diff --git a/sph/sha2.c b/sph/sha2.c
new file mode 100644
index 0000000000..d13a49514b
--- /dev/null
+++ b/sph/sha2.c
@@ -0,0 +1,630 @@
+/*
+ * Copyright 2011 ArtForz
+ * Copyright 2011-2013 pooler
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include "cpuminer-config.h"
+#include "miner.h"
+
+#include <string.h>
+#include <inttypes.h>
+
+#if defined(__arm__) && defined(__APCS_32__)
+#define EXTERN_SHA256
+#endif
+
+static const uint32_t sha256_h[8] = {
+	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+static const uint32_t sha256_k[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+void sha256_init(uint32_t *state)
+{
+	memcpy(state, sha256_h, 32);
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z)     ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z)    ((x & (y | z)) | (y & z))
+#define ROTR(x, n)      ((x >> n) | (x << (32 - n)))
+#define S0(x)           (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x)           (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x)           (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
+#define s1(x)           (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+	do { \
+		t0 = h + S1(e) + Ch(e, f, g) + k; \
+		t1 = S0(a) + Maj(a, b, c); \
+		d += t0; \
+		h  = t0 + t1; \
+	} while (0)
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i) \
+	RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+	    S[(66 - i) % 8], S[(67 - i) % 8], \
+	    S[(68 - i) % 8], S[(69 - i) % 8], \
+	    S[(70 - i) % 8], S[(71 - i) % 8], \
+	    W[i] + sha256_k[i])
+
+#ifndef EXTERN_SHA256
+
+/*
+ * SHA256 block compression function.  The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
+{
+	uint32_t W[64];
+	uint32_t S[8];
+	uint32_t t0, t1;
+	int i;
+
+	/* 1. Prepare message schedule W. */
+	if (swap) {
+		for (i = 0; i < 16; i++)
+			W[i] = swab32(block[i]);
+	} else
+		memcpy(W, block, 64);
+	for (i = 16; i < 64; i += 2) {
+		W[i]   = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+		W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+	}
+
+	/* 2. Initialize working variables. */
+	memcpy(S, state, 32);
+
+	/* 3. Mix. */
+	RNDr(S, W,  0);
+	RNDr(S, W,  1);
+	RNDr(S, W,  2);
+	RNDr(S, W,  3);
+	RNDr(S, W,  4);
+	RNDr(S, W,  5);
+	RNDr(S, W,  6);
+	RNDr(S, W,  7);
+	RNDr(S, W,  8);
+	RNDr(S, W,  9);
+	RNDr(S, W, 10);
+	RNDr(S, W, 11);
+	RNDr(S, W, 12);
+	RNDr(S, W, 13);
+	RNDr(S, W, 14);
+	RNDr(S, W, 15);
+	RNDr(S, W, 16);
+	RNDr(S, W, 17);
+	RNDr(S, W, 18);
+	RNDr(S, W, 19);
+	RNDr(S, W, 20);
+	RNDr(S, W, 21);
+	RNDr(S, W, 22);
+	RNDr(S, W, 23);
+	RNDr(S, W, 24);
+	RNDr(S, W, 25);
+	RNDr(S, W, 26);
+	RNDr(S, W, 27);
+	RNDr(S, W, 28);
+	RNDr(S, W, 29);
+	RNDr(S, W, 30);
+	RNDr(S, W, 31);
+	RNDr(S, W, 32);
+	RNDr(S, W, 33);
+	RNDr(S, W, 34);
+	RNDr(S, W, 35);
+	RNDr(S, W, 36);
+	RNDr(S, W, 37);
+	RNDr(S, W, 38);
+	RNDr(S, W, 39);
+	RNDr(S, W, 40);
+	RNDr(S, W, 41);
+	RNDr(S, W, 42);
+	RNDr(S, W, 43);
+	RNDr(S, W, 44);
+	RNDr(S, W, 45);
+	RNDr(S, W, 46);
+	RNDr(S, W, 47);
+	RNDr(S, W, 48);
+	RNDr(S, W, 49);
+	RNDr(S, W, 50);
+	RNDr(S, W, 51);
+	RNDr(S, W, 52);
+	RNDr(S, W, 53);
+	RNDr(S, W, 54);
+	RNDr(S, W, 55);
+	RNDr(S, W, 56);
+	RNDr(S, W, 57);
+	RNDr(S, W, 58);
+	RNDr(S, W, 59);
+	RNDr(S, W, 60);
+	RNDr(S, W, 61);
+	RNDr(S, W, 62);
+	RNDr(S, W, 63);
+
+	/* 4. Mix local working variables into global state */
+	for (i = 0; i < 8; i++)
+		state[i] += S[i];
+}
+
+#endif /* EXTERN_SHA256 */
+
+
+static const uint32_t sha256d_hash1[16] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x80000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000100
+};
+
+static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
+{
+	uint32_t S[16];
+	int i;
+
+	sha256_init(S);
+	sha256_transform(S, data, 0);
+	sha256_transform(S, data + 16, 0);
+	memcpy(S + 8, sha256d_hash1 + 8, 32);
+	sha256_init(hash);
+	sha256_transform(hash, S, 0);
+	for (i = 0; i < 8; i++)
+		hash[i] = swab32(hash[i]);
+}
+
+void sha256d(unsigned char *hash, const unsigned char *data, int len)
+{
+	uint32_t S[16], T[16];
+	int i, r;
+
+	sha256_init(S);
+	for (r = len; r > -9; r -= 64) {
+		if (r < 64)
+			memset(T, 0, 64);
+		memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
+		if (r >= 0 && r < 64)
+			((unsigned char *)T)[r] = 0x80;
+		for (i = 0; i < 16; i++)
+			T[i] = be32dec(T + i);
+		if (r < 56)
+			T[15] = 8 * len;
+		sha256_transform(S, T, 0);
+	}
+	memcpy(S + 8, sha256d_hash1 + 8, 32);
+	sha256_init(T);
+	sha256_transform(T, S, 0);
+	for (i = 0; i < 8; i++)
+		be32enc((uint32_t *)hash + i, T[i]);
+}
+
+static inline void sha256d_preextend(uint32_t *W)
+{
+	W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
+	W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
+	W[18] = s1(W[16]) + W[11]             + W[ 2];
+	W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
+	W[20] =             W[13] + s0(W[ 5]) + W[ 4];
+	W[21] =             W[14] + s0(W[ 6]) + W[ 5];
+	W[22] =             W[15] + s0(W[ 7]) + W[ 6];
+	W[23] =             W[16] + s0(W[ 8]) + W[ 7];
+	W[24] =             W[17] + s0(W[ 9]) + W[ 8];
+	W[25] =                     s0(W[10]) + W[ 9];
+	W[26] =                     s0(W[11]) + W[10];
+	W[27] =                     s0(W[12]) + W[11];
+	W[28] =                     s0(W[13]) + W[12];
+	W[29] =                     s0(W[14]) + W[13];
+	W[30] =                     s0(W[15]) + W[14];
+	W[31] =                     s0(W[16]) + W[15];
+}
+
+static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
+{
+	uint32_t t0, t1;
+	RNDr(S, W, 0);
+	RNDr(S, W, 1);
+	RNDr(S, W, 2);
+}
+
+#ifdef EXTERN_SHA256
+
+void sha256d_ms(uint32_t *hash, uint32_t *W,
+	const uint32_t *midstate, const uint32_t *prehash);
+
+#else
+
+static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
+	const uint32_t *midstate, const uint32_t *prehash)
+{
+	uint32_t S[64];
+	uint32_t t0, t1;
+	int i;
+
+	S[18] = W[18];
+	S[19] = W[19];
+	S[20] = W[20];
+	S[22] = W[22];
+	S[23] = W[23];
+	S[24] = W[24];
+	S[30] = W[30];
+	S[31] = W[31];
+
+	W[18] += s0(W[3]);
+	W[19] += W[3];
+	W[20] += s1(W[18]);
+	W[21]  = s1(W[19]);
+	W[22] += s1(W[20]);
+	W[23] += s1(W[21]);
+	W[24] += s1(W[22]);
+	W[25]  = s1(W[23]) + W[18];
+	W[26]  = s1(W[24]) + W[19];
+	W[27]  = s1(W[25]) + W[20];
+	W[28]  = s1(W[26]) + W[21];
+	W[29]  = s1(W[27]) + W[22];
+	W[30] += s1(W[28]) + W[23];
+	W[31] += s1(W[29]) + W[24];
+	for (i = 32; i < 64; i += 2) {
+		W[i]   = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+		W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+	}
+
+	memcpy(S, prehash, 32);
+
+	RNDr(S, W,  3);
+	RNDr(S, W,  4);
+	RNDr(S, W,  5);
+	RNDr(S, W,  6);
+	RNDr(S, W,  7);
+	RNDr(S, W,  8);
+	RNDr(S, W,  9);
+	RNDr(S, W, 10);
+	RNDr(S, W, 11);
+	RNDr(S, W, 12);
+	RNDr(S, W, 13);
+	RNDr(S, W, 14);
+	RNDr(S, W, 15);
+	RNDr(S, W, 16);
+	RNDr(S, W, 17);
+	RNDr(S, W, 18);
+	RNDr(S, W, 19);
+	RNDr(S, W, 20);
+	RNDr(S, W, 21);
+	RNDr(S, W, 22);
+	RNDr(S, W, 23);
+	RNDr(S, W, 24);
+	RNDr(S, W, 25);
+	RNDr(S, W, 26);
+	RNDr(S, W, 27);
+	RNDr(S, W, 28);
+	RNDr(S, W, 29);
+	RNDr(S, W, 30);
+	RNDr(S, W, 31);
+	RNDr(S, W, 32);
+	RNDr(S, W, 33);
+	RNDr(S, W, 34);
+	RNDr(S, W, 35);
+	RNDr(S, W, 36);
+	RNDr(S, W, 37);
+	RNDr(S, W, 38);
+	RNDr(S, W, 39);
+	RNDr(S, W, 40);
+	RNDr(S, W, 41);
+	RNDr(S, W, 42);
+	RNDr(S, W, 43);
+	RNDr(S, W, 44);
+	RNDr(S, W, 45);
+	RNDr(S, W, 46);
+	RNDr(S, W, 47);
+	RNDr(S, W, 48);
+	RNDr(S, W, 49);
+	RNDr(S, W, 50);
+	RNDr(S, W, 51);
+	RNDr(S, W, 52);
+	RNDr(S, W, 53);
+	RNDr(S, W, 54);
+	RNDr(S, W, 55);
+	RNDr(S, W, 56);
+	RNDr(S, W, 57);
+	RNDr(S, W, 58);
+	RNDr(S, W, 59);
+	RNDr(S, W, 60);
+	RNDr(S, W, 61);
+	RNDr(S, W, 62);
+	RNDr(S, W, 63);
+
+	for (i = 0; i < 8; i++)
+		S[i] += midstate[i];
+	
+	W[18] = S[18];
+	W[19] = S[19];
+	W[20] = S[20];
+	W[22] = S[22];
+	W[23] = S[23];
+	W[24] = S[24];
+	W[30] = S[30];
+	W[31] = S[31];
+	
+	memcpy(S + 8, sha256d_hash1 + 8, 32);
+	S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
+	S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
+	S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
+	S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
+	S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
+	S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
+	S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
+	S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
+	S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
+	S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
+	S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
+	S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
+	S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
+	S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
+	S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
+	S[31] = s1(S[29]) + S[24] + s0(S[16])             + sha256d_hash1[15];
+	for (i = 32; i < 60; i += 2) {
+		S[i]   = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
+		S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
+	}
+	S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
+
+	sha256_init(hash);
+
+	RNDr(hash, S,  0);
+	RNDr(hash, S,  1);
+	RNDr(hash, S,  2);
+	RNDr(hash, S,  3);
+	RNDr(hash, S,  4);
+	RNDr(hash, S,  5);
+	RNDr(hash, S,  6);
+	RNDr(hash, S,  7);
+	RNDr(hash, S,  8);
+	RNDr(hash, S,  9);
+	RNDr(hash, S, 10);
+	RNDr(hash, S, 11);
+	RNDr(hash, S, 12);
+	RNDr(hash, S, 13);
+	RNDr(hash, S, 14);
+	RNDr(hash, S, 15);
+	RNDr(hash, S, 16);
+	RNDr(hash, S, 17);
+	RNDr(hash, S, 18);
+	RNDr(hash, S, 19);
+	RNDr(hash, S, 20);
+	RNDr(hash, S, 21);
+	RNDr(hash, S, 22);
+	RNDr(hash, S, 23);
+	RNDr(hash, S, 24);
+	RNDr(hash, S, 25);
+	RNDr(hash, S, 26);
+	RNDr(hash, S, 27);
+	RNDr(hash, S, 28);
+	RNDr(hash, S, 29);
+	RNDr(hash, S, 30);
+	RNDr(hash, S, 31);
+	RNDr(hash, S, 32);
+	RNDr(hash, S, 33);
+	RNDr(hash, S, 34);
+	RNDr(hash, S, 35);
+	RNDr(hash, S, 36);
+	RNDr(hash, S, 37);
+	RNDr(hash, S, 38);
+	RNDr(hash, S, 39);
+	RNDr(hash, S, 40);
+	RNDr(hash, S, 41);
+	RNDr(hash, S, 42);
+	RNDr(hash, S, 43);
+	RNDr(hash, S, 44);
+	RNDr(hash, S, 45);
+	RNDr(hash, S, 46);
+	RNDr(hash, S, 47);
+	RNDr(hash, S, 48);
+	RNDr(hash, S, 49);
+	RNDr(hash, S, 50);
+	RNDr(hash, S, 51);
+	RNDr(hash, S, 52);
+	RNDr(hash, S, 53);
+	RNDr(hash, S, 54);
+	RNDr(hash, S, 55);
+	RNDr(hash, S, 56);
+	
+	hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
+	         + S[57] + sha256_k[57];
+	hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
+	         + S[58] + sha256_k[58];
+	hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
+	         + S[59] + sha256_k[59];
+	hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
+	         + S[60] + sha256_k[60]
+	         + sha256_h[7];
+}
+
+#endif /* EXTERN_SHA256 */
+
+#ifdef HAVE_SHA256_4WAY
+
+void sha256d_ms_4way(uint32_t *hash,  uint32_t *data,
+	const uint32_t *midstate, const uint32_t *prehash);
+
+static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	uint32_t data[4 * 64] __attribute__((aligned(128)));
+	uint32_t hash[4 * 8] __attribute__((aligned(32)));
+	uint32_t midstate[4 * 8] __attribute__((aligned(32)));
+	uint32_t prehash[4 * 8] __attribute__((aligned(32)));
+	uint32_t n = pdata[19] - 1;
+	const uint32_t first_nonce = pdata[19];
+	const uint32_t Htarg = ptarget[7];
+	int i, j;
+	
+	memcpy(data, pdata + 16, 64);
+	sha256d_preextend(data);
+	for (i = 31; i >= 0; i--)
+		for (j = 0; j < 4; j++)
+			data[i * 4 + j] = data[i];
+	
+	sha256_init(midstate);
+	sha256_transform(midstate, pdata, 0);
+	memcpy(prehash, midstate, 32);
+	sha256d_prehash(prehash, pdata + 16);
+	for (i = 7; i >= 0; i--) {
+		for (j = 0; j < 4; j++) {
+			midstate[i * 4 + j] = midstate[i];
+			prehash[i * 4 + j] = prehash[i];
+		}
+	}
+	
+	do {
+		for (i = 0; i < 4; i++)
+			data[4 * 3 + i] = ++n;
+		
+		sha256d_ms_4way(hash, data, midstate, prehash);
+		
+		for (i = 0; i < 4; i++) {
+			if (swab32(hash[4 * 7 + i]) <= Htarg) {
+				pdata[19] = data[4 * 3 + i];
+				sha256d_80_swap(hash, pdata);
+				if (fulltest(hash, ptarget)) {
+					*hashes_done = n - first_nonce + 1;
+					return 1;
+				}
+			}
+		}
+	} while (n < max_nonce && !work_restart[thr_id].restart);
+	
+	*hashes_done = n - first_nonce + 1;
+	pdata[19] = n;
+	return 0;
+}
+
+#endif /* HAVE_SHA256_4WAY */
+
+#ifdef HAVE_SHA256_8WAY
+
+void sha256d_ms_8way(uint32_t *hash,  uint32_t *data,
+	const uint32_t *midstate, const uint32_t *prehash);
+
+static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
+{
+	uint32_t data[8 * 64] __attribute__((aligned(128)));
+	uint32_t hash[8 * 8] __attribute__((aligned(32)));
+	uint32_t midstate[8 * 8] __attribute__((aligned(32)));
+	uint32_t prehash[8 * 8] __attribute__((aligned(32)));
+	uint32_t n = pdata[19] - 1;
+	const uint32_t first_nonce = pdata[19];
+	const uint32_t Htarg = ptarget[7];
+	int i, j;
+	
+	memcpy(data, pdata + 16, 64);
+	sha256d_preextend(data);
+	for (i = 31; i >= 0; i--)
+		for (j = 0; j < 8; j++)
+			data[i * 8 + j] = data[i];
+	
+	sha256_init(midstate);
+	sha256_transform(midstate, pdata, 0);
+	memcpy(prehash, midstate, 32);
+	sha256d_prehash(prehash, pdata + 16);
+	for (i = 7; i >= 0; i--) {
+		for (j = 0; j < 8; j++) {
+			midstate[i * 8 + j] = midstate[i];
+			prehash[i * 8 + j] = prehash[i];
+		}
+	}
+	
+	do {
+		for (i = 0; i < 8; i++)
+			data[8 * 3 + i] = ++n;
+		
+		sha256d_ms_8way(hash, data, midstate, prehash);
+		
+		for (i = 0; i < 8; i++) {
+			if (swab32(hash[8 * 7 + i]) <= Htarg) {
+				pdata[19] = data[8 * 3 + i];
+				sha256d_80_swap(hash, pdata);
+				if (fulltest(hash, ptarget)) {
+					*hashes_done = n - first_nonce + 1;
+					return 1;
+				}
+			}
+		}
+	} while (n < max_nonce && !work_restart[thr_id].restart);
+	
+	*hashes_done = n - first_nonce + 1;
+	pdata[19] = n;
+	return 0;
+}
+
+#endif /* HAVE_SHA256_8WAY */
+
+int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
+	uint32_t max_nonce, unsigned long *hashes_done)
+{
+	uint32_t data[64] __attribute__((aligned(128)));
+	uint32_t hash[8] __attribute__((aligned(32)));
+	uint32_t midstate[8] __attribute__((aligned(32)));
+	uint32_t prehash[8] __attribute__((aligned(32)));
+	uint32_t n = pdata[19] - 1;
+	const uint32_t first_nonce = pdata[19];
+	const uint32_t Htarg = ptarget[7];
+	
+#ifdef HAVE_SHA256_8WAY
+	if (sha256_use_8way())
+		return scanhash_sha256d_8way(thr_id, pdata, ptarget,
+			max_nonce, hashes_done);
+#endif
+#ifdef HAVE_SHA256_4WAY
+	if (sha256_use_4way())
+		return scanhash_sha256d_4way(thr_id, pdata, ptarget,
+			max_nonce, hashes_done);
+#endif
+	
+	memcpy(data, pdata + 16, 64);
+	sha256d_preextend(data);
+	
+	sha256_init(midstate);
+	sha256_transform(midstate, pdata, 0);
+	memcpy(prehash, midstate, 32);
+	sha256d_prehash(prehash, pdata + 16);
+	
+	do {
+		data[3] = ++n;
+		sha256d_ms(hash, data, midstate, prehash);
+		if (swab32(hash[7]) <= Htarg) {
+			pdata[19] = data[3];
+			sha256d_80_swap(hash, pdata);
+			if (fulltest(hash, ptarget)) {
+				*hashes_done = n - first_nonce + 1;
+				return 1;
+			}
+		}
+	} while (n < max_nonce && !work_restart[thr_id].restart);
+	
+	*hashes_done = n - first_nonce + 1;
+	pdata[19] = n;
+	return 0;
+}
diff --git a/sph/sha2big.c b/sph/sha2big.c
new file mode 100644
index 0000000000..00a7e7f70d
--- /dev/null
+++ b/sph/sha2big.c
@@ -0,0 +1,256 @@
+/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * SHA-384 / SHA-512 implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_sha2.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_64
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((X) & (Y)) | (((X) | (Y)) & (Z)))
+
+#define ROTR64    SPH_ROTR64
+
+#define BSG5_0(x)      (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
+#define BSG5_1(x)      (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
+#define SSG5_0(x)      (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7))
+#define SSG5_1(x)      (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6))
+
+static const sph_u64 K512[80] = {
+	SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
+	SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
+	SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
+	SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
+	SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
+	SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
+	SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
+	SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
+	SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
+	SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
+	SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
+	SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
+	SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
+	SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
+	SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
+	SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
+	SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
+	SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
+	SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
+	SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
+	SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
+	SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
+	SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
+	SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
+	SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
+	SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
+	SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
+	SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
+	SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
+	SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
+	SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
+	SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
+	SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
+	SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
+	SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
+	SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
+	SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
+	SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
+	SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
+	SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
+};
+
+static const sph_u64 H384[8] = {
+	SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507),
+	SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939),
+	SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511),
+	SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4)
+};
+
+static const sph_u64 H512[8] = {
+	SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
+	SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
+	SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
+	SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
+};
+
+/*
+ * This macro defines the body for a SHA-384 / SHA-512 compression function
+ * implementation. The "in" parameter should evaluate, when applied to a
+ * numerical input parameter from 0 to 15, to an expression which yields
+ * the corresponding input block. The "r" parameter should evaluate to
+ * an array or pointer expression designating the array of 8 words which
+ * contains the input and output of the compression function.
+ *
+ * SHA-512 is hard for the compiler. If the loop is completely unrolled,
+ * then the code will be quite huge (possibly more than 100 kB), and the
+ * performance will be degraded due to cache misses on the code. We
+ * unroll only eight steps, which avoids all needless copies when
+ * 64-bit registers are swapped.
+ */
+
+#define SHA3_STEP(A, B, C, D, E, F, G, H, i)   do { \
+		sph_u64 T1, T2; \
+		T1 = SPH_T64(H + BSG5_1(E) + CH(E, F, G) + K512[i] + W[i]); \
+		T2 = SPH_T64(BSG5_0(A) + MAJ(A, B, C)); \
+		D = SPH_T64(D + T1); \
+		H = SPH_T64(T1 + T2); \
+	} while (0)
+
+#define SHA3_ROUND_BODY(in, r)   do { \
+		int i; \
+		sph_u64 A, B, C, D, E, F, G, H; \
+		sph_u64 W[80]; \
+ \
+ 		for (i = 0; i < 16; i ++) \
+			W[i] = in(i); \
+			\
+		for (i = 16; i < 80; i ++) \
+ 			W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] \
+				+ SSG5_0(W[i - 15]) + W[i - 16]); \
+		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+		D = (r)[3]; \
+		E = (r)[4]; \
+		F = (r)[5]; \
+		G = (r)[6]; \
+		H = (r)[7]; \
+		for (i = 0; i < 80; i += 8) { \
+			SHA3_STEP(A, B, C, D, E, F, G, H, i + 0); \
+			SHA3_STEP(H, A, B, C, D, E, F, G, i + 1); \
+			SHA3_STEP(G, H, A, B, C, D, E, F, i + 2); \
+			SHA3_STEP(F, G, H, A, B, C, D, E, i + 3); \
+			SHA3_STEP(E, F, G, H, A, B, C, D, i + 4); \
+			SHA3_STEP(D, E, F, G, H, A, B, C, i + 5); \
+			SHA3_STEP(C, D, E, F, G, H, A, B, i + 6); \
+			SHA3_STEP(B, C, D, E, F, G, H, A, i + 7); \
+		} \
+		(r)[0] = SPH_T64((r)[0] + A); \
+		(r)[1] = SPH_T64((r)[1] + B); \
+		(r)[2] = SPH_T64((r)[2] + C); \
+		(r)[3] = SPH_T64((r)[3] + D); \
+		(r)[4] = SPH_T64((r)[4] + E); \
+		(r)[5] = SPH_T64((r)[5] + F); \
+		(r)[6] = SPH_T64((r)[6] + G); \
+		(r)[7] = SPH_T64((r)[7] + H); \
+	} while (0)
+
+/*
+ * One round of SHA-384 / SHA-512. The data must be aligned for 64-bit access.
+ */
+static void
+sha3_round(const unsigned char *data, sph_u64 r[8])
+{
+#define SHA3_IN(x)   sph_dec64be_aligned(data + (8 * (x)))
+	SHA3_ROUND_BODY(SHA3_IN, r);
+#undef SHA3_IN
+}
+
+/* see sph_sha3.h */
+void
+sph_sha384_init(void *cc)
+{
+	sph_sha384_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, H384, sizeof H384);
+	sc->count = 0;
+}
+
+/* see sph_sha3.h */
+void
+sph_sha512_init(void *cc)
+{
+	sph_sha512_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, H512, sizeof H512);
+	sc->count = 0;
+}
+
+#define RFUN   sha3_round
+#define HASH   sha384
+#define BE64   1
+#include "md_helper.c"
+
+/* see sph_sha3.h */
+void
+sph_sha384_close(void *cc, void *dst)
+{
+	sha384_close(cc, dst, 6);
+	sph_sha384_init(cc);
+}
+
+/* see sph_sha3.h */
+void
+sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	sha384_addbits_and_close(cc, ub, n, dst, 6);
+	sph_sha384_init(cc);
+}
+
+/* see sph_sha3.h */
+void
+sph_sha512_close(void *cc, void *dst)
+{
+	sha384_close(cc, dst, 8);
+	sph_sha512_init(cc);
+}
+
+/* see sph_sha3.h */
+void
+sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	sha384_addbits_and_close(cc, ub, n, dst, 8);
+	sph_sha512_init(cc);
+}
+
+/* see sph_sha3.h */
+void
+sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8])
+{
+#define SHA3_IN(x)   msg[x]
+	SHA3_ROUND_BODY(SHA3_IN, val);
+#undef SHA3_IN
+}
+
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/sph/shabal.c b/sph/shabal.c
new file mode 100644
index 0000000000..4f5162140f
--- /dev/null
+++ b/sph/shabal.c
@@ -0,0 +1,799 @@
+/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
+/*
+ * Shabal implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_shabal.h"
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+/*
+ * Part of this code was automatically generated (the part between
+ * the "BEGIN" and "END" markers).
+ */
+
+#define sM    16
+
+#define C32   SPH_C32
+#define T32   SPH_T32
+
+#define O1   13
+#define O2    9
+#define O3    6
+
+/*
+ * We copy the state into local variables, so that the compiler knows
+ * that it can optimize them at will.
+ */
+
+/* BEGIN -- automatically generated code. */
+
+#define DECL_STATE   \
+	sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
+	        A08, A09, A0A, A0B; \
+	sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
+	        B8, B9, BA, BB, BC, BD, BE, BF; \
+	sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
+	        C8, C9, CA, CB, CC, CD, CE, CF; \
+	sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
+	        M8, M9, MA, MB, MC, MD, ME, MF; \
+	sph_u32 Wlow, Whigh;
+
+#define READ_STATE(state)   do { \
+		A00 = (state)->A[0]; \
+		A01 = (state)->A[1]; \
+		A02 = (state)->A[2]; \
+		A03 = (state)->A[3]; \
+		A04 = (state)->A[4]; \
+		A05 = (state)->A[5]; \
+		A06 = (state)->A[6]; \
+		A07 = (state)->A[7]; \
+		A08 = (state)->A[8]; \
+		A09 = (state)->A[9]; \
+		A0A = (state)->A[10]; \
+		A0B = (state)->A[11]; \
+		B0 = (state)->B[0]; \
+		B1 = (state)->B[1]; \
+		B2 = (state)->B[2]; \
+		B3 = (state)->B[3]; \
+		B4 = (state)->B[4]; \
+		B5 = (state)->B[5]; \
+		B6 = (state)->B[6]; \
+		B7 = (state)->B[7]; \
+		B8 = (state)->B[8]; \
+		B9 = (state)->B[9]; \
+		BA = (state)->B[10]; \
+		BB = (state)->B[11]; \
+		BC = (state)->B[12]; \
+		BD = (state)->B[13]; \
+		BE = (state)->B[14]; \
+		BF = (state)->B[15]; \
+		C0 = (state)->C[0]; \
+		C1 = (state)->C[1]; \
+		C2 = (state)->C[2]; \
+		C3 = (state)->C[3]; \
+		C4 = (state)->C[4]; \
+		C5 = (state)->C[5]; \
+		C6 = (state)->C[6]; \
+		C7 = (state)->C[7]; \
+		C8 = (state)->C[8]; \
+		C9 = (state)->C[9]; \
+		CA = (state)->C[10]; \
+		CB = (state)->C[11]; \
+		CC = (state)->C[12]; \
+		CD = (state)->C[13]; \
+		CE = (state)->C[14]; \
+		CF = (state)->C[15]; \
+		Wlow = (state)->Wlow; \
+		Whigh = (state)->Whigh; \
+	} while (0)
+
+#define WRITE_STATE(state)   do { \
+		(state)->A[0] = A00; \
+		(state)->A[1] = A01; \
+		(state)->A[2] = A02; \
+		(state)->A[3] = A03; \
+		(state)->A[4] = A04; \
+		(state)->A[5] = A05; \
+		(state)->A[6] = A06; \
+		(state)->A[7] = A07; \
+		(state)->A[8] = A08; \
+		(state)->A[9] = A09; \
+		(state)->A[10] = A0A; \
+		(state)->A[11] = A0B; \
+		(state)->B[0] = B0; \
+		(state)->B[1] = B1; \
+		(state)->B[2] = B2; \
+		(state)->B[3] = B3; \
+		(state)->B[4] = B4; \
+		(state)->B[5] = B5; \
+		(state)->B[6] = B6; \
+		(state)->B[7] = B7; \
+		(state)->B[8] = B8; \
+		(state)->B[9] = B9; \
+		(state)->B[10] = BA; \
+		(state)->B[11] = BB; \
+		(state)->B[12] = BC; \
+		(state)->B[13] = BD; \
+		(state)->B[14] = BE; \
+		(state)->B[15] = BF; \
+		(state)->C[0] = C0; \
+		(state)->C[1] = C1; \
+		(state)->C[2] = C2; \
+		(state)->C[3] = C3; \
+		(state)->C[4] = C4; \
+		(state)->C[5] = C5; \
+		(state)->C[6] = C6; \
+		(state)->C[7] = C7; \
+		(state)->C[8] = C8; \
+		(state)->C[9] = C9; \
+		(state)->C[10] = CA; \
+		(state)->C[11] = CB; \
+		(state)->C[12] = CC; \
+		(state)->C[13] = CD; \
+		(state)->C[14] = CE; \
+		(state)->C[15] = CF; \
+		(state)->Wlow = Wlow; \
+		(state)->Whigh = Whigh; \
+	} while (0)
+
+#define DECODE_BLOCK   do { \
+		M0 = sph_dec32le_aligned(buf + 0); \
+		M1 = sph_dec32le_aligned(buf + 4); \
+		M2 = sph_dec32le_aligned(buf + 8); \
+		M3 = sph_dec32le_aligned(buf + 12); \
+		M4 = sph_dec32le_aligned(buf + 16); \
+		M5 = sph_dec32le_aligned(buf + 20); \
+		M6 = sph_dec32le_aligned(buf + 24); \
+		M7 = sph_dec32le_aligned(buf + 28); \
+		M8 = sph_dec32le_aligned(buf + 32); \
+		M9 = sph_dec32le_aligned(buf + 36); \
+		MA = sph_dec32le_aligned(buf + 40); \
+		MB = sph_dec32le_aligned(buf + 44); \
+		MC = sph_dec32le_aligned(buf + 48); \
+		MD = sph_dec32le_aligned(buf + 52); \
+		ME = sph_dec32le_aligned(buf + 56); \
+		MF = sph_dec32le_aligned(buf + 60); \
+	} while (0)
+
+#define INPUT_BLOCK_ADD   do { \
+		B0 = T32(B0 + M0); \
+		B1 = T32(B1 + M1); \
+		B2 = T32(B2 + M2); \
+		B3 = T32(B3 + M3); \
+		B4 = T32(B4 + M4); \
+		B5 = T32(B5 + M5); \
+		B6 = T32(B6 + M6); \
+		B7 = T32(B7 + M7); \
+		B8 = T32(B8 + M8); \
+		B9 = T32(B9 + M9); \
+		BA = T32(BA + MA); \
+		BB = T32(BB + MB); \
+		BC = T32(BC + MC); \
+		BD = T32(BD + MD); \
+		BE = T32(BE + ME); \
+		BF = T32(BF + MF); \
+	} while (0)
+
+#define INPUT_BLOCK_SUB   do { \
+		C0 = T32(C0 - M0); \
+		C1 = T32(C1 - M1); \
+		C2 = T32(C2 - M2); \
+		C3 = T32(C3 - M3); \
+		C4 = T32(C4 - M4); \
+		C5 = T32(C5 - M5); \
+		C6 = T32(C6 - M6); \
+		C7 = T32(C7 - M7); \
+		C8 = T32(C8 - M8); \
+		C9 = T32(C9 - M9); \
+		CA = T32(CA - MA); \
+		CB = T32(CB - MB); \
+		CC = T32(CC - MC); \
+		CD = T32(CD - MD); \
+		CE = T32(CE - ME); \
+		CF = T32(CF - MF); \
+	} while (0)
+
+#define XOR_W   do { \
+		A00 ^= Wlow; \
+		A01 ^= Whigh; \
+	} while (0)
+
+#define SWAP(v1, v2)   do { \
+		sph_u32 tmp = (v1); \
+		(v1) = (v2); \
+		(v2) = tmp; \
+	} while (0)
+
+#define SWAP_BC   do { \
+		SWAP(B0, C0); \
+		SWAP(B1, C1); \
+		SWAP(B2, C2); \
+		SWAP(B3, C3); \
+		SWAP(B4, C4); \
+		SWAP(B5, C5); \
+		SWAP(B6, C6); \
+		SWAP(B7, C7); \
+		SWAP(B8, C8); \
+		SWAP(B9, C9); \
+		SWAP(BA, CA); \
+		SWAP(BB, CB); \
+		SWAP(BC, CC); \
+		SWAP(BD, CD); \
+		SWAP(BE, CE); \
+		SWAP(BF, CF); \
+	} while (0)
+
+#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
+		xa0 = T32((xa0 \
+			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
+			^ xc) * 3U) \
+			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
+		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
+	} while (0)
+
+#define PERM_STEP_0   do { \
+		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_1   do { \
+		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define PERM_STEP_2   do { \
+		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
+	} while (0)
+
+#define APPLY_P   do { \
+		B0 = T32(B0 << 17) | (B0 >> 15); \
+		B1 = T32(B1 << 17) | (B1 >> 15); \
+		B2 = T32(B2 << 17) | (B2 >> 15); \
+		B3 = T32(B3 << 17) | (B3 >> 15); \
+		B4 = T32(B4 << 17) | (B4 >> 15); \
+		B5 = T32(B5 << 17) | (B5 >> 15); \
+		B6 = T32(B6 << 17) | (B6 >> 15); \
+		B7 = T32(B7 << 17) | (B7 >> 15); \
+		B8 = T32(B8 << 17) | (B8 >> 15); \
+		B9 = T32(B9 << 17) | (B9 >> 15); \
+		BA = T32(BA << 17) | (BA >> 15); \
+		BB = T32(BB << 17) | (BB >> 15); \
+		BC = T32(BC << 17) | (BC >> 15); \
+		BD = T32(BD << 17) | (BD >> 15); \
+		BE = T32(BE << 17) | (BE >> 15); \
+		BF = T32(BF << 17) | (BF >> 15); \
+		PERM_STEP_0; \
+		PERM_STEP_1; \
+		PERM_STEP_2; \
+		A0B = T32(A0B + C6); \
+		A0A = T32(A0A + C5); \
+		A09 = T32(A09 + C4); \
+		A08 = T32(A08 + C3); \
+		A07 = T32(A07 + C2); \
+		A06 = T32(A06 + C1); \
+		A05 = T32(A05 + C0); \
+		A04 = T32(A04 + CF); \
+		A03 = T32(A03 + CE); \
+		A02 = T32(A02 + CD); \
+		A01 = T32(A01 + CC); \
+		A00 = T32(A00 + CB); \
+		A0B = T32(A0B + CA); \
+		A0A = T32(A0A + C9); \
+		A09 = T32(A09 + C8); \
+		A08 = T32(A08 + C7); \
+		A07 = T32(A07 + C6); \
+		A06 = T32(A06 + C5); \
+		A05 = T32(A05 + C4); \
+		A04 = T32(A04 + C3); \
+		A03 = T32(A03 + C2); \
+		A02 = T32(A02 + C1); \
+		A01 = T32(A01 + C0); \
+		A00 = T32(A00 + CF); \
+		A0B = T32(A0B + CE); \
+		A0A = T32(A0A + CD); \
+		A09 = T32(A09 + CC); \
+		A08 = T32(A08 + CB); \
+		A07 = T32(A07 + CA); \
+		A06 = T32(A06 + C9); \
+		A05 = T32(A05 + C8); \
+		A04 = T32(A04 + C7); \
+		A03 = T32(A03 + C6); \
+		A02 = T32(A02 + C5); \
+		A01 = T32(A01 + C4); \
+		A00 = T32(A00 + C3); \
+	} while (0)
+
+#define INCR_W   do { \
+		if ((Wlow = T32(Wlow + 1)) == 0) \
+			Whigh = T32(Whigh + 1); \
+	} while (0)
+
+static const sph_u32 A_init_192[] = {
+	C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
+	C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
+	C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
+};
+
+static const sph_u32 B_init_192[] = {
+	C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
+	C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
+	C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
+	C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
+};
+
+static const sph_u32 C_init_192[] = {
+	C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
+	C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
+	C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
+	C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
+};
+
+static const sph_u32 A_init_224[] = {
+	C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
+	C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
+	C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
+};
+
+static const sph_u32 B_init_224[] = {
+	C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
+	C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
+	C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
+	C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
+};
+
+static const sph_u32 C_init_224[] = {
+	C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
+	C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
+	C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
+	C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
+};
+
+static const sph_u32 A_init_256[] = {
+	C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
+	C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
+	C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
+};
+
+static const sph_u32 B_init_256[] = {
+	C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
+	C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
+	C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
+	C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
+};
+
+static const sph_u32 C_init_256[] = {
+	C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
+	C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
+	C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
+	C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
+};
+
+static const sph_u32 A_init_384[] = {
+	C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
+	C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
+	C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
+};
+
+static const sph_u32 B_init_384[] = {
+	C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
+	C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
+	C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
+	C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
+};
+
+static const sph_u32 C_init_384[] = {
+	C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
+	C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
+	C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
+	C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
+};
+
+static const sph_u32 A_init_512[] = {
+	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
+	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
+	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
+};
+
+static const sph_u32 B_init_512[] = {
+	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
+	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
+	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
+	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
+};
+
+static const sph_u32 C_init_512[] = {
+	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
+	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
+	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
+	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
+};
+
+/* END -- automatically generated code. */
+
+static void
+shabal_init(void *cc, unsigned size)
+{
+	/*
+	 * We have precomputed initial states for all the supported
+	 * output bit lengths.
+	 */
+	const sph_u32 *A_init, *B_init, *C_init;
+	sph_shabal_context *sc;
+
+	switch (size) {
+	case 192:
+		A_init = A_init_192;
+		B_init = B_init_192;
+		C_init = C_init_192;
+		break;
+	case 224:
+		A_init = A_init_224;
+		B_init = B_init_224;
+		C_init = C_init_224;
+		break;
+	case 256:
+		A_init = A_init_256;
+		B_init = B_init_256;
+		C_init = C_init_256;
+		break;
+	case 384:
+		A_init = A_init_384;
+		B_init = B_init_384;
+		C_init = C_init_384;
+		break;
+	case 512:
+		A_init = A_init_512;
+		B_init = B_init_512;
+		C_init = C_init_512;
+		break;
+	default:
+		return;
+	}
+	sc = cc;
+	memcpy(sc->A, A_init, sizeof sc->A);
+	memcpy(sc->B, B_init, sizeof sc->B);
+	memcpy(sc->C, C_init, sizeof sc->C);
+	sc->Wlow = 1;
+	sc->Whigh = 0;
+	sc->ptr = 0;
+}
+
+static void
+shabal_core(void *cc, const unsigned char *data, size_t len)
+{
+	sph_shabal_context *sc;
+	unsigned char *buf;
+	size_t ptr;
+	DECL_STATE
+
+	sc = cc;
+	buf = sc->buf;
+	ptr = sc->ptr;
+
+	/*
+	 * We do not want to copy the state to local variables if the
+	 * amount of data is less than what is needed to complete the
+	 * current block. Note that it is anyway suboptimal to call
+	 * this method many times for small chunks of data.
+	 */
+	if (len < (sizeof sc->buf) - ptr) {
+		memcpy(buf + ptr, data, len);
+		ptr += len;
+		sc->ptr = ptr;
+		return;
+	}
+
+	READ_STATE(sc);
+	while (len > 0) {
+		size_t clen;
+
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		ptr += clen;
+		data += clen;
+		len -= clen;
+		if (ptr == sizeof sc->buf) {
+			DECODE_BLOCK;
+			INPUT_BLOCK_ADD;
+			XOR_W;
+			APPLY_P;
+			INPUT_BLOCK_SUB;
+			SWAP_BC;
+			INCR_W;
+			ptr = 0;
+		}
+	}
+	WRITE_STATE(sc);
+	sc->ptr = ptr;
+}
+
+static void
+shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
+{
+	sph_shabal_context *sc;
+	unsigned char *buf;
+	size_t ptr;
+	int i;
+	unsigned z;
+	union {
+		unsigned char tmp_out[64];
+		sph_u32 dummy;
+	} u;
+	size_t out_len;
+	DECL_STATE
+
+	sc = cc;
+	buf = sc->buf;
+	ptr = sc->ptr;
+	z = 0x80 >> n;
+	buf[ptr] = ((ub & -z) | z) & 0xFF;
+	memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
+	READ_STATE(sc);
+	DECODE_BLOCK;
+	INPUT_BLOCK_ADD;
+	XOR_W;
+	APPLY_P;
+	for (i = 0; i < 3; i ++) {
+		SWAP_BC;
+		XOR_W;
+		APPLY_P;
+	}
+
+	/*
+	 * We just use our local variables; no need to go through
+	 * the state structure. In order to share some code, we
+	 * emit the relevant words into a temporary buffer, which
+	 * we finally copy into the destination array.
+	 */
+	switch (size_words) {
+	case 16:
+		sph_enc32le_aligned(u.tmp_out +  0, B0);
+		sph_enc32le_aligned(u.tmp_out +  4, B1);
+		sph_enc32le_aligned(u.tmp_out +  8, B2);
+		sph_enc32le_aligned(u.tmp_out + 12, B3);
+		/* fall through */
+	case 12:
+		sph_enc32le_aligned(u.tmp_out + 16, B4);
+		sph_enc32le_aligned(u.tmp_out + 20, B5);
+		sph_enc32le_aligned(u.tmp_out + 24, B6);
+		sph_enc32le_aligned(u.tmp_out + 28, B7);
+		/* fall through */
+	case 8:
+		sph_enc32le_aligned(u.tmp_out + 32, B8);
+		/* fall through */
+	case 7:
+		sph_enc32le_aligned(u.tmp_out + 36, B9);
+		/* fall through */
+	case 6:
+		sph_enc32le_aligned(u.tmp_out + 40, BA);
+		sph_enc32le_aligned(u.tmp_out + 44, BB);
+		sph_enc32le_aligned(u.tmp_out + 48, BC);
+		sph_enc32le_aligned(u.tmp_out + 52, BD);
+		sph_enc32le_aligned(u.tmp_out + 56, BE);
+		sph_enc32le_aligned(u.tmp_out + 60, BF);
+		break;
+	default:
+		return;
+	}
+	out_len = size_words << 2;
+	memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
+	shabal_init(sc, size_words << 5);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192_init(void *cc)
+{
+	shabal_init(cc, 192);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 6);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 6);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_init(void *cc)
+{
+	shabal_init(cc, 224);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 7);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 7);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_init(void *cc)
+{
+	shabal_init(cc, 256);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 8);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 8);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_init(void *cc)
+{
+	shabal_init(cc, 384);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 12);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 12);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512_init(void *cc)
+{
+	shabal_init(cc, 512);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512(void *cc, const void *data, size_t len)
+{
+	shabal_core(cc, data, len);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512_close(void *cc, void *dst)
+{
+	shabal_close(cc, 0, 0, dst, 16);
+}
+
+/* see sph_shabal.h */
+void
+sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	shabal_close(cc, ub, n, dst, 16);
+}
diff --git a/sph/sph_haval.h b/sph/sph_haval.h
new file mode 100644
index 0000000000..409daaf417
--- /dev/null
+++ b/sph/sph_haval.h
@@ -0,0 +1,976 @@
+/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */
+/**
+ * HAVAL interface.
+ *
+ * HAVAL is actually a family of 15 hash functions, depending on whether
+ * the internal computation uses 3, 4 or 5 passes, and on the output
+ * length, which is 128, 160, 192, 224 or 256 bits. This implementation
+ * provides interface functions for all 15, which internally map to
+ * three cores (depending on the number of passes). Note that output
+ * lengths other than 256 bits are not obtained by a simple truncation
+ * of a longer result; the requested length is encoded within the
+ * padding data.
+ *
+ * HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer
+ * Seberry: "HAVAL -- a one-way hashing algorithm with variable length
+ * of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in
+ * Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993.
+ *
+ * This paper, and a reference implementation, are available on the
+ * Calyptix web site: http://labs.calyptix.com/haval.php
+ *
+ * The HAVAL reference paper is quite unclear on the data encoding
+ * details, i.e. endianness (both byte order within a 32-bit word, and
+ * word order within a message block). This implementation has been
+ * made compatible with the reference implementation referenced above.
+ *
+ * @warning   A collision for HAVAL-128/3 (HAVAL with three passes and
+ * 128-bit output) has been published; this function is thus considered
+ * as cryptographically broken. The status for other variants is unclear;
+ * use only with care.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_haval.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_HAVAL_H__
+#define SPH_HAVAL_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for HAVAL-128/3.
+ */
+#define SPH_SIZE_haval128_3   128
+
+/**
+ * Output size (in bits) for HAVAL-128/4.
+ */
+#define SPH_SIZE_haval128_4   128
+
+/**
+ * Output size (in bits) for HAVAL-128/5.
+ */
+#define SPH_SIZE_haval128_5   128
+
+/**
+ * Output size (in bits) for HAVAL-160/3.
+ */
+#define SPH_SIZE_haval160_3   160
+
+/**
+ * Output size (in bits) for HAVAL-160/4.
+ */
+#define SPH_SIZE_haval160_4   160
+
+/**
+ * Output size (in bits) for HAVAL-160/5.
+ */
+#define SPH_SIZE_haval160_5   160
+
+/**
+ * Output size (in bits) for HAVAL-192/3.
+ */
+#define SPH_SIZE_haval192_3   192
+
+/**
+ * Output size (in bits) for HAVAL-192/4.
+ */
+#define SPH_SIZE_haval192_4   192
+
+/**
+ * Output size (in bits) for HAVAL-192/5.
+ */
+#define SPH_SIZE_haval192_5   192
+
+/**
+ * Output size (in bits) for HAVAL-224/3.
+ */
+#define SPH_SIZE_haval224_3   224
+
+/**
+ * Output size (in bits) for HAVAL-224/4.
+ */
+#define SPH_SIZE_haval224_4   224
+
+/**
+ * Output size (in bits) for HAVAL-224/5.
+ */
+#define SPH_SIZE_haval224_5   224
+
+/**
+ * Output size (in bits) for HAVAL-256/3.
+ */
+#define SPH_SIZE_haval256_3   256
+
+/**
+ * Output size (in bits) for HAVAL-256/4.
+ */
+#define SPH_SIZE_haval256_4   256
+
+/**
+ * Output size (in bits) for HAVAL-256/5.
+ */
+#define SPH_SIZE_haval256_5   256
+
+/**
+ * This structure is a context for HAVAL computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a HAVAL computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running HAVAL computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[128];    /* first field, for alignment */
+	sph_u32 s0, s1, s2, s3, s4, s5, s6, s7;
+	unsigned olen, passes;
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_haval_context;
+
+/**
+ * Type for a HAVAL-128/3 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval128_3_context;
+
+/**
+ * Type for a HAVAL-128/4 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval128_4_context;
+
+/**
+ * Type for a HAVAL-128/5 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval128_5_context;
+
+/**
+ * Type for a HAVAL-160/3 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval160_3_context;
+
+/**
+ * Type for a HAVAL-160/4 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval160_4_context;
+
+/**
+ * Type for a HAVAL-160/5 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval160_5_context;
+
+/**
+ * Type for a HAVAL-192/3 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval192_3_context;
+
+/**
+ * Type for a HAVAL-192/4 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval192_4_context;
+
+/**
+ * Type for a HAVAL-192/5 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval192_5_context;
+
+/**
+ * Type for a HAVAL-224/3 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval224_3_context;
+
+/**
+ * Type for a HAVAL-224/4 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval224_4_context;
+
+/**
+ * Type for a HAVAL-224/5 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval224_5_context;
+
+/**
+ * Type for a HAVAL-256/3 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval256_3_context;
+
+/**
+ * Type for a HAVAL-256/4 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval256_4_context;
+
+/**
+ * Type for a HAVAL-256/5 context (identical to the common context).
+ */
+typedef sph_haval_context sph_haval256_5_context;
+
+/**
+ * Initialize the context for HAVAL-128/3.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval128_3_context</code> structure)
+ */
+void sph_haval128_3_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-128/3. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-128/3 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval128_3(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-128/3 computation. The output buffer must be wide
+ * enough to accomodate the result (16 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-128/3 context
+ * @param dst   the output buffer
+ */
+void sph_haval128_3_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-128/3 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (16
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-128/3 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval128_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-128/4.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval128_4_context</code> structure)
+ */
+void sph_haval128_4_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-128/4. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-128/4 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval128_4(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-128/4 computation. The output buffer must be wide
+ * enough to accomodate the result (16 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-128/4 context
+ * @param dst   the output buffer
+ */
+void sph_haval128_4_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-128/4 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (16
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-128/4 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval128_4_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-128/5.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval128_5_context</code> structure)
+ */
+void sph_haval128_5_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-128/5. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-128/5 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval128_5(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-128/5 computation. The output buffer must be wide
+ * enough to accomodate the result (16 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-128/5 context
+ * @param dst   the output buffer
+ */
+void sph_haval128_5_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-128/5 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (16
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-128/5 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval128_5_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-160/3.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval160_3_context</code> structure)
+ */
+void sph_haval160_3_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-160/3. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-160/3 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval160_3(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-160/3 computation. The output buffer must be wide
+ * enough to accomodate the result (20 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-160/3 context
+ * @param dst   the output buffer
+ */
+void sph_haval160_3_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-160/3 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (20
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-160/3 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval160_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-160/4.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval160_4_context</code> structure)
+ */
+void sph_haval160_4_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-160/4. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-160/4 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval160_4(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-160/4 computation. The output buffer must be wide
+ * enough to accomodate the result (20 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-160/4 context
+ * @param dst   the output buffer
+ */
+void sph_haval160_4_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-160/4 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (20
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-160/4 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval160_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-160/5.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval160_5_context</code> structure)
+ */
+void sph_haval160_5_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-160/5. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-160/5 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval160_5(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-160/5 computation. The output buffer must be wide
+ * enough to accomodate the result (20 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-160/5 context
+ * @param dst   the output buffer
+ */
+void sph_haval160_5_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-160/5 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (20
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-160/5 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval160_5_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-192/3.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval192_3_context</code> structure)
+ */
+void sph_haval192_3_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-192/3. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-192/3 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval192_3(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-192/3 computation. The output buffer must be wide
+ * enough to accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-192/3 context
+ * @param dst   the output buffer
+ */
+void sph_haval192_3_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-192/3 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (24
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-192/3 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval192_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-192/4.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval192_4_context</code> structure)
+ */
+void sph_haval192_4_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-192/4. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-192/4 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval192_4(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-192/4 computation. The output buffer must be wide
+ * enough to accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-192/4 context
+ * @param dst   the output buffer
+ */
+void sph_haval192_4_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-192/4 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (24
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-192/4 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval192_4_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-192/5.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval192_5_context</code> structure)
+ */
+void sph_haval192_5_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-192/5. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-192/5 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval192_5(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-192/5 computation. The output buffer must be wide
+ * enough to accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-192/5 context
+ * @param dst   the output buffer
+ */
+void sph_haval192_5_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-192/5 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (24
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-192/5 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval192_5_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-224/3.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval224_3_context</code> structure)
+ */
+void sph_haval224_3_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-224/3. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-224/3 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval224_3(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-224/3 computation. The output buffer must be wide
+ * enough to accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-224/3 context
+ * @param dst   the output buffer
+ */
+void sph_haval224_3_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-224/3 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (28
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-224/3 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval224_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-224/4.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval224_4_context</code> structure)
+ */
+void sph_haval224_4_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-224/4. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-224/4 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval224_4(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-224/4 computation. The output buffer must be wide
+ * enough to accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-224/4 context
+ * @param dst   the output buffer
+ */
+void sph_haval224_4_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-224/4 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (28
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-224/4 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval224_4_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-224/5.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval224_5_context</code> structure)
+ */
+void sph_haval224_5_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-224/5. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-224/5 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval224_5(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-224/5 computation. The output buffer must be wide
+ * enough to accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-224/5 context
+ * @param dst   the output buffer
+ */
+void sph_haval224_5_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-224/5 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (28
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-224/5 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval224_5_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-256/3.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval256_3_context</code> structure)
+ */
+void sph_haval256_3_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-256/3. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-256/3 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval256_3(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-256/3 computation. The output buffer must be wide
+ * enough to accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-256/3 context
+ * @param dst   the output buffer
+ */
+void sph_haval256_3_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-256/3 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (32
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-256/3 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval256_3_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-256/4.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval256_4_context</code> structure)
+ */
+void sph_haval256_4_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-256/4. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-256/4 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval256_4(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-256/4 computation. The output buffer must be wide
+ * enough to accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-256/4 context
+ * @param dst   the output buffer
+ */
+void sph_haval256_4_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-256/4 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (32
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-256/4 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval256_4_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize the context for HAVAL-256/5.
+ *
+ * @param cc   context to initialize (pointer to a
+ *             <code>sph_haval256_5_context</code> structure)
+ */
+void sph_haval256_5_init(void *cc);
+
+/**
+ * Process some data bytes for HAVAL-256/5. If <code>len</code> is 0,
+ * then this function does nothing.
+ *
+ * @param cc     the HAVAL-256/5 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_haval256_5(void *cc, const void *data, size_t len);
+
+/**
+ * Close a HAVAL-256/5 computation. The output buffer must be wide
+ * enough to accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the HAVAL-256/5 context
+ * @param dst   the output buffer
+ */
+void sph_haval256_5_close(void *cc, void *dst);
+
+/**
+ * Close a HAVAL-256/5 computation. Up to 7 extra input bits may be added
+ * to the input message; these are the <code>n</code> upper bits of
+ * the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+ * <code>ub</code>, the second extra bit has value 64, and so on). Other
+ * bits in <code>ub</code> are ignored.
+ *
+ * The output buffer must be wide enough to accomodate the result (32
+ * bytes). The context is automatically reinitialized.
+ *
+ * @param cc    the HAVAL-256/5 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the output buffer
+ */
+void sph_haval256_5_addbits_and_close(void *cc,
+	unsigned ub, unsigned n, void *dst);
+
+/**
+ * Apply the HAVAL compression function on the provided data. The
+ * <code>msg</code> parameter contains the 32 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 8 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array. This function uses three internal passes.
+ *
+ * @param msg   the message block (32 values)
+ * @param val   the function 256-bit input and output
+ */
+void sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+/**
+ * Apply the HAVAL compression function on the provided data. The
+ * <code>msg</code> parameter contains the 32 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 8 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array. This function uses four internal passes.
+ *
+ * @param msg   the message block (32 values)
+ * @param val   the function 256-bit input and output
+ */
+void sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+/**
+ * Apply the HAVAL compression function on the provided data. The
+ * <code>msg</code> parameter contains the 32 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 8 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array. This function uses five internal passes.
+ *
+ * @param msg   the message block (32 values)
+ * @param val   the function 256-bit input and output
+ */
+void sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/sph/sph_ripemd.h b/sph/sph_ripemd.h
new file mode 100644
index 0000000000..256776830f
--- /dev/null
+++ b/sph/sph_ripemd.h
@@ -0,0 +1,273 @@
+/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * RIPEMD, RIPEMD-128 and RIPEMD-160 interface.
+ *
+ * RIPEMD was first described in: Research and Development in Advanced
+ * Communication Technologies in Europe, "RIPE Integrity Primitives:
+ * Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE,
+ * June 1992.
+ *
+ * A new, strengthened version, dubbed RIPEMD-160, was published in: H.
+ * Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened
+ * version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039,
+ * Springer (1996), pp. 71--82.
+ *
+ * This article describes both RIPEMD-160, with a 160-bit output, and a
+ * reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128
+ * was meant as a "drop-in" replacement for any hash function with 128-bit
+ * output, especially the original RIPEMD.
+ *
+ * @warning   Collisions, and an efficient method to build other collisions,
+ * have been published for the original RIPEMD, which is thus considered as
+ * cryptographically broken. It is also very rarely encountered, and there
+ * seems to exist no free description or implementation of RIPEMD (except
+ * the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160
+ * seem as secure as their output length allows.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_ripemd.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_RIPEMD_H__
+#define SPH_RIPEMD_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for RIPEMD.
+ */
+#define SPH_SIZE_ripemd   128
+
+/**
+ * Output size (in bits) for RIPEMD-128.
+ */
+#define SPH_SIZE_ripemd128   128
+
+/**
+ * Output size (in bits) for RIPEMD-160.
+ */
+#define SPH_SIZE_ripemd160   160
+
+/**
+ * This structure is a context for RIPEMD computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a RIPEMD computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running RIPEMD computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u32 val[4];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_ripemd_context;
+
+/**
+ * Initialize a RIPEMD context. This process performs no memory allocation.
+ *
+ * @param cc   the RIPEMD context (pointer to
+ *             a <code>sph_ripemd_context</code>)
+ */
+void sph_ripemd_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the RIPEMD context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_ripemd(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current RIPEMD computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (16 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the RIPEMD context
+ * @param dst   the destination buffer
+ */
+void sph_ripemd_close(void *cc, void *dst);
+
+/**
+ * Apply the RIPEMD compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 5 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 128-bit input and output
+ */
+void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]);
+
+/* ===================================================================== */ 
+
+/**
+ * This structure is a context for RIPEMD-128 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a RIPEMD-128 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running RIPEMD-128 computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u32 val[4];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_ripemd128_context;
+
+/**
+ * Initialize a RIPEMD-128 context. This process performs no memory allocation.
+ *
+ * @param cc   the RIPEMD-128 context (pointer to
+ *             a <code>sph_ripemd128_context</code>)
+ */
+void sph_ripemd128_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the RIPEMD-128 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_ripemd128(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current RIPEMD-128 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (16 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the RIPEMD-128 context
+ * @param dst   the destination buffer
+ */
+void sph_ripemd128_close(void *cc, void *dst);
+
+/**
+ * Apply the RIPEMD-128 compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 5 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 128-bit input and output
+ */
+void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]);
+
+/* ===================================================================== */ 
+
+/**
+ * This structure is a context for RIPEMD-160 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a RIPEMD-160 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running RIPEMD-160 computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u32 val[5];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_ripemd160_context;
+
+/**
+ * Initialize a RIPEMD-160 context. This process performs no memory allocation.
+ *
+ * @param cc   the RIPEMD-160 context (pointer to
+ *             a <code>sph_ripemd160_context</code>)
+ */
+void sph_ripemd160_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the RIPEMD-160 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_ripemd160(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current RIPEMD-160 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (20 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the RIPEMD-160 context
+ * @param dst   the destination buffer
+ */
+void sph_ripemd160_close(void *cc, void *dst);
+
+/**
+ * Apply the RIPEMD-160 compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 32-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 5 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 160-bit input and output
+ */
+void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]);
+
+#endif
diff --git a/sph/sph_sha2.c b/sph/sph_sha2.c
new file mode 100644
index 0000000000..aab2c5518c
--- /dev/null
+++ b/sph/sph_sha2.c
@@ -0,0 +1,691 @@
+/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * SHA-224 / SHA-256 implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_sha2.h"
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
+#define SPH_SMALL_FOOTPRINT_SHA2   1
+#endif
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR    SPH_ROTR32
+
+#define BSG2_0(x)      (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define BSG2_1(x)      (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define SSG2_0(x)      (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
+#define SSG2_1(x)      (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
+
+static const sph_u32 H224[8] = {
+	SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
+	SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
+	SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
+};
+
+static const sph_u32 H256[8] = {
+	SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
+	SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
+	SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
+};
+
+/*
+ * The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
+ * compression function implementation. The "in" parameter should
+ * evaluate, when applied to a numerical input parameter from 0 to 15,
+ * to an expression which yields the corresponding input block. The "r"
+ * parameter should evaluate to an array or pointer expression
+ * designating the array of 8 words which contains the input and output
+ * of the compression function.
+ */
+
+#if SPH_SMALL_FOOTPRINT_SHA2
+
+static const sph_u32 K[64] = {
+	SPH_C32(0x428A2F98), SPH_C32(0x71374491),
+	SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
+	SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
+	SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
+	SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
+	SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
+	SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
+	SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
+	SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
+	SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
+	SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
+	SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
+	SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
+	SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
+	SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
+	SPH_C32(0x06CA6351), SPH_C32(0x14292967),
+	SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
+	SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
+	SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
+	SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
+	SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
+	SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
+	SPH_C32(0xD192E819), SPH_C32(0xD6990624),
+	SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
+	SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
+	SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
+	SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
+	SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
+	SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
+	SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
+	SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
+	SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
+};
+
+#define SHA2_MEXP1(in, pc)   do { \
+		W[pc] = in(pc); \
+	} while (0)
+
+#define SHA2_MEXP2(in, pc)   do { \
+		W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
+			+ W[((pc) - 7) & 0x0F] \
+			+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
+	} while (0)
+
+#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc)   do { \
+		sph_u32 t1, t2; \
+		SHA2_MEXP ## n(in, pc); \
+		t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
+			+ K[pcount + (pc)] + W[(pc) & 0x0F]); \
+		t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
+		d = SPH_T32(d + t1); \
+		h = SPH_T32(t1 + t2); \
+	} while (0)
+
+#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
+	SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
+#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
+	SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
+
+#define SHA2_ROUND_BODY(in, r)   do { \
+		sph_u32 A, B, C, D, E, F, G, H; \
+		sph_u32 W[16]; \
+		unsigned pcount; \
+ \
+		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+		D = (r)[3]; \
+		E = (r)[4]; \
+		F = (r)[5]; \
+		G = (r)[6]; \
+		H = (r)[7]; \
+		pcount = 0; \
+		SHA2_STEP1(A, B, C, D, E, F, G, H, in,  0); \
+		SHA2_STEP1(H, A, B, C, D, E, F, G, in,  1); \
+		SHA2_STEP1(G, H, A, B, C, D, E, F, in,  2); \
+		SHA2_STEP1(F, G, H, A, B, C, D, E, in,  3); \
+		SHA2_STEP1(E, F, G, H, A, B, C, D, in,  4); \
+		SHA2_STEP1(D, E, F, G, H, A, B, C, in,  5); \
+		SHA2_STEP1(C, D, E, F, G, H, A, B, in,  6); \
+		SHA2_STEP1(B, C, D, E, F, G, H, A, in,  7); \
+		SHA2_STEP1(A, B, C, D, E, F, G, H, in,  8); \
+		SHA2_STEP1(H, A, B, C, D, E, F, G, in,  9); \
+		SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
+		SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
+		SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
+		SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
+		SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
+		SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
+		for (pcount = 16; pcount < 64; pcount += 16) { \
+			SHA2_STEP2(A, B, C, D, E, F, G, H, in,  0); \
+			SHA2_STEP2(H, A, B, C, D, E, F, G, in,  1); \
+			SHA2_STEP2(G, H, A, B, C, D, E, F, in,  2); \
+			SHA2_STEP2(F, G, H, A, B, C, D, E, in,  3); \
+			SHA2_STEP2(E, F, G, H, A, B, C, D, in,  4); \
+			SHA2_STEP2(D, E, F, G, H, A, B, C, in,  5); \
+			SHA2_STEP2(C, D, E, F, G, H, A, B, in,  6); \
+			SHA2_STEP2(B, C, D, E, F, G, H, A, in,  7); \
+			SHA2_STEP2(A, B, C, D, E, F, G, H, in,  8); \
+			SHA2_STEP2(H, A, B, C, D, E, F, G, in,  9); \
+			SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
+			SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
+			SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
+			SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
+			SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
+			SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
+		} \
+		(r)[0] = SPH_T32((r)[0] + A); \
+		(r)[1] = SPH_T32((r)[1] + B); \
+		(r)[2] = SPH_T32((r)[2] + C); \
+		(r)[3] = SPH_T32((r)[3] + D); \
+		(r)[4] = SPH_T32((r)[4] + E); \
+		(r)[5] = SPH_T32((r)[5] + F); \
+		(r)[6] = SPH_T32((r)[6] + G); \
+		(r)[7] = SPH_T32((r)[7] + H); \
+	} while (0)
+
+#else
+
+#define SHA2_ROUND_BODY(in, r)   do { \
+		sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
+		sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
+		sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
+		int i; \
+ \
+ 		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+		D = (r)[3]; \
+		E = (r)[4]; \
+		F = (r)[5]; \
+		G = (r)[6]; \
+		H = (r)[7]; \
+		W00 = in(0); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0x428A2F98) + W00); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W01 = in(1); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0x71374491) + W01); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W02 = in(2); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0xB5C0FBCF) + W02); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W03 = in(3); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0xE9B5DBA5) + W03); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W04 = in(4); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x3956C25B) + W04); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W05 = in(5); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0x59F111F1) + W05); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W06 = in(6); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x923F82A4) + W06); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W07 = in(7); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0xAB1C5ED5) + W07); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W08 = in(8); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0xD807AA98) + W08); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W09 = in(9); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0x12835B01) + W09); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W10 = in(10); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0x243185BE) + W10); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W11 = in(11); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0x550C7DC3) + W11); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W12 = in(12); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x72BE5D74) + W12); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W13 = in(13); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0x80DEB1FE) + W13); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W14 = in(14); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x9BDC06A7) + W14); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W15 = in(15); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0xC19BF174) + W15); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0xE49B69C1) + W00); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0xEFBE4786) + W01); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0x0FC19DC6) + W02); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0x240CA1CC) + W03); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x2DE92C6F) + W04); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0x4A7484AA) + W05); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x5CB0A9DC) + W06); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0x76F988DA) + W07); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0x983E5152) + W08); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0xA831C66D) + W09); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0xB00327C8) + W10); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0xBF597FC7) + W11); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0xC6E00BF3) + W12); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0xD5A79147) + W13); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x06CA6351) + W14); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0x14292967) + W15); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0x27B70A85) + W00); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0x2E1B2138) + W01); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0x4D2C6DFC) + W02); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0x53380D13) + W03); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x650A7354) + W04); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0x766A0ABB) + W05); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x81C2C92E) + W06); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0x92722C85) + W07); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0xA2BFE8A1) + W08); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0xA81A664B) + W09); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0xC24B8B70) + W10); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0xC76C51A3) + W11); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0xD192E819) + W12); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0xD6990624) + W13); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0xF40E3585) + W14); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0x106AA070) + W15); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0x19A4C116) + W00); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0x1E376C08) + W01); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0x2748774C) + W02); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0x34B0BCB5) + W03); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x391C0CB3) + W04); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0x4ED8AA4A) + W05); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0x5B9CCA4F) + W06); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0x682E6FF3) + W07); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
+		T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
+			+ SPH_C32(0x748F82EE) + W08); \
+		T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
+		D = SPH_T32(D + T1); \
+		H = SPH_T32(T1 + T2); \
+		W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
+		T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
+			+ SPH_C32(0x78A5636F) + W09); \
+		T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
+		C = SPH_T32(C + T1); \
+		G = SPH_T32(T1 + T2); \
+		W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
+		T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
+			+ SPH_C32(0x84C87814) + W10); \
+		T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
+		B = SPH_T32(B + T1); \
+		F = SPH_T32(T1 + T2); \
+		W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
+		T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
+			+ SPH_C32(0x8CC70208) + W11); \
+		T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
+		A = SPH_T32(A + T1); \
+		E = SPH_T32(T1 + T2); \
+		W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
+		T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
+			+ SPH_C32(0x90BEFFFA) + W12); \
+		T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
+		H = SPH_T32(H + T1); \
+		D = SPH_T32(T1 + T2); \
+		W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
+		T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
+			+ SPH_C32(0xA4506CEB) + W13); \
+		T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
+		G = SPH_T32(G + T1); \
+		C = SPH_T32(T1 + T2); \
+		W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
+		T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
+			+ SPH_C32(0xBEF9A3F7) + W14); \
+		T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
+		F = SPH_T32(F + T1); \
+		B = SPH_T32(T1 + T2); \
+		W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
+		T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
+			+ SPH_C32(0xC67178F2) + W15); \
+		T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
+		E = SPH_T32(E + T1); \
+		A = SPH_T32(T1 + T2); \
+		(r)[0] = SPH_T32((r)[0] + A); \
+		(r)[1] = SPH_T32((r)[1] + B); \
+		(r)[2] = SPH_T32((r)[2] + C); \
+		(r)[3] = SPH_T32((r)[3] + D); \
+		(r)[4] = SPH_T32((r)[4] + E); \
+		(r)[5] = SPH_T32((r)[5] + F); \
+		(r)[6] = SPH_T32((r)[6] + G); \
+		(r)[7] = SPH_T32((r)[7] + H); \
+	} while (0)
+
+#endif
+
+/*
+ * One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
+ */
+static void
+sha2_round(const unsigned char *data, sph_u32 r[8])
+{
+#define SHA2_IN(x)   sph_dec32be_aligned(data + (4 * (x)))
+	SHA2_ROUND_BODY(SHA2_IN, r);
+#undef SHA2_IN
+}
+
+/* see sph_sha2.h */
+void
+sph_sha224_init(void *cc)
+{
+	sph_sha224_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, H224, sizeof H224);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+/* see sph_sha2.h */
+void
+sph_sha256_init(void *cc)
+{
+	sph_sha256_context *sc;
+
+	sc = cc;
+	memcpy(sc->val, H256, sizeof H256);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define RFUN   sha2_round
+#define HASH   sha224
+#define BE32   1
+#include "md_helper.c"
+
+/* see sph_sha2.h */
+void
+sph_sha224_close(void *cc, void *dst)
+{
+	sha224_close(cc, dst, 7);
+	sph_sha224_init(cc);
+}
+
+/* see sph_sha2.h */
+void
+sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	sha224_addbits_and_close(cc, ub, n, dst, 7);
+	sph_sha224_init(cc);
+}
+
+/* see sph_sha2.h */
+void
+sph_sha256_close(void *cc, void *dst)
+{
+	sha224_close(cc, dst, 8);
+	sph_sha256_init(cc);
+}
+
+/* see sph_sha2.h */
+void
+sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	sha224_addbits_and_close(cc, ub, n, dst, 8);
+	sph_sha256_init(cc);
+}
+
+/* see sph_sha2.h */
+void
+sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
+{
+#define SHA2_IN(x)   msg[x]
+	SHA2_ROUND_BODY(SHA2_IN, val);
+#undef SHA2_IN
+}
diff --git a/sph/sph_sha2.h b/sph/sph_sha2.h
new file mode 100644
index 0000000000..c47b0f3698
--- /dev/null
+++ b/sph/sph_sha2.h
@@ -0,0 +1,378 @@
+/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * SHA-224, SHA-256, SHA-384 and SHA-512 interface.
+ *
+ * SHA-256 has been published in FIPS 180-2, now amended with a change
+ * notice to include SHA-224 as well (which is a simple variation on
+ * SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS
+ * standards can be found at:
+ *    http://csrc.nist.gov/publications/fips/
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_sha2.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_SHA2_H__
+#define SPH_SHA2_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+/**
+ * Output size (in bits) for SHA-224.
+ */
+#define SPH_SIZE_sha224   224
+
+/**
+ * Output size (in bits) for SHA-256.
+ */
+#define SPH_SIZE_sha256   256
+
+/**
+ * This structure is a context for SHA-224 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a SHA-224 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running SHA-224 computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u32 val[8];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_sha224_context;
+
+/**
+ * This structure is a context for SHA-256 computations. It is identical
+ * to the SHA-224 context. However, a context is initialized for SHA-224
+ * <strong>or</strong> SHA-256, but not both (the internal IV is not the
+ * same).
+ */
+typedef sph_sha224_context sph_sha256_context;
+
+/**
+ * Initialize a SHA-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the SHA-224 context (pointer to
+ *             a <code>sph_sha224_context</code>)
+ */
+void sph_sha224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the SHA-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_sha224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current SHA-224 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the SHA-224 context
+ * @param dst   the destination buffer
+ */
+void sph_sha224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the SHA-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Apply the SHA-224 compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 32-bit input blocks,
+ * as numerical values (hence after the big-endian decoding). The
+ * <code>val</code> parameter contains the 8 32-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 256-bit input and output
+ */
+void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]);
+
+/**
+ * Initialize a SHA-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the SHA-256 context (pointer to
+ *             a <code>sph_sha256_context</code>)
+ */
+void sph_sha256_init(void *cc);
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Process some data bytes, for SHA-256. This function is identical to
+ * <code>sha_224()</code>
+ *
+ * @param cc     the SHA-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_sha256(void *cc, const void *data, size_t len);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_sha256   sph_sha224
+#endif
+
+/**
+ * Terminate the current SHA-256 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the SHA-256 context
+ * @param dst   the destination buffer
+ */
+void sph_sha256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the SHA-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Apply the SHA-256 compression function on the provided data. This
+ * function is identical to <code>sha224_comp()</code>.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 256-bit input and output
+ */
+void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_sha256_comp   sph_sha224_comp
+#endif
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for SHA-384.
+ */
+#define SPH_SIZE_sha384   384
+
+/**
+ * Output size (in bits) for SHA-512.
+ */
+#define SPH_SIZE_sha512   512
+
+/**
+ * This structure is a context for SHA-384 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a SHA-384 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running SHA-384 computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[128];    /* first field, for alignment */
+	sph_u64 val[8];
+	sph_u64 count;
+#endif
+} sph_sha384_context;
+
+/**
+ * Initialize a SHA-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the SHA-384 context (pointer to
+ *             a <code>sph_sha384_context</code>)
+ */
+void sph_sha384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the SHA-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_sha384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current SHA-384 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the SHA-384 context
+ * @param dst   the destination buffer
+ */
+void sph_sha384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the SHA-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Apply the SHA-384 compression function on the provided data. The
+ * <code>msg</code> parameter contains the 16 64-bit input blocks,
+ * as numerical values (hence after the big-endian decoding). The
+ * <code>val</code> parameter contains the 8 64-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 512-bit input and output
+ */
+void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]);
+
+/**
+ * This structure is a context for SHA-512 computations. It is identical
+ * to the SHA-384 context. However, a context is initialized for SHA-384
+ * <strong>or</strong> SHA-512, but not both (the internal IV is not the
+ * same).
+ */
+typedef sph_sha384_context sph_sha512_context;
+
+/**
+ * Initialize a SHA-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the SHA-512 context (pointer to
+ *             a <code>sph_sha512_context</code>)
+ */
+void sph_sha512_init(void *cc);
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Process some data bytes, for SHA-512. This function is identical to
+ * <code>sph_sha384()</code>.
+ *
+ * @param cc     the SHA-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_sha512(void *cc, const void *data, size_t len);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_sha512   sph_sha384
+#endif
+
+/**
+ * Terminate the current SHA-512 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the SHA-512 context
+ * @param dst   the destination buffer
+ */
+void sph_sha512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the SHA-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Apply the SHA-512 compression function. This function is identical to
+ * <code>sph_sha384_comp()</code>.
+ *
+ * @param msg   the message block (16 values)
+ * @param val   the function 512-bit input and output
+ */
+void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_sha512_comp   sph_sha384_comp
+#endif
+
+#endif
+
+#endif
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/sph/sph_shabal.h b/sph/sph_shabal.h
new file mode 100644
index 0000000000..4c96047742
--- /dev/null
+++ b/sph/sph_shabal.h
@@ -0,0 +1,336 @@
+/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
+/**
+ * Shabal interface. Shabal is a family of functions which differ by
+ * their output size; this implementation defines Shabal for output
+ * sizes 192, 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_shabal.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_SHABAL_H__
+#define SPH_SHABAL_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for Shabal-192.
+ */
+#define SPH_SIZE_shabal192   192
+
+/**
+ * Output size (in bits) for Shabal-224.
+ */
+#define SPH_SIZE_shabal224   224
+
+/**
+ * Output size (in bits) for Shabal-256.
+ */
+#define SPH_SIZE_shabal256   256
+
+/**
+ * Output size (in bits) for Shabal-384.
+ */
+#define SPH_SIZE_shabal384   384
+
+/**
+ * Output size (in bits) for Shabal-512.
+ */
+#define SPH_SIZE_shabal512   512
+
+/**
+ * This structure is a context for Shabal computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a Shabal computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running Shabal computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	sph_u32 A[12], B[16], C[16];
+	sph_u32 Whigh, Wlow;
+#endif
+} sph_shabal_context;
+
+/**
+ * Type for a Shabal-192 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal192_context;
+
+/**
+ * Type for a Shabal-224 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal224_context;
+
+/**
+ * Type for a Shabal-256 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal256_context;
+
+/**
+ * Type for a Shabal-384 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal384_context;
+
+/**
+ * Type for a Shabal-512 context (identical to the common context).
+ */
+typedef sph_shabal_context sph_shabal512_context;
+
+/**
+ * Initialize a Shabal-192 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-192 context (pointer to a
+ *             <code>sph_shabal192_context</code>)
+ */
+void sph_shabal192_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-192 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal192(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-192 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-192 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal192_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (24 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-192 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal192_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-224 context (pointer to a
+ *             <code>sph_shabal224_context</code>)
+ */
+void sph_shabal224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-224 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-256 context (pointer to a
+ *             <code>sph_shabal256_context</code>)
+ */
+void sph_shabal256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-256 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-384 context (pointer to a
+ *             <code>sph_shabal384_context</code>)
+ */
+void sph_shabal384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-384 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Shabal-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Shabal-512 context (pointer to a
+ *             <code>sph_shabal512_context</code>)
+ */
+void sph_shabal512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Shabal-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_shabal512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Shabal-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Shabal-512 context
+ * @param dst   the destination buffer
+ */
+void sph_shabal512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Shabal-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_shabal512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif
diff --git a/sph/sph_tiger.h b/sph/sph_tiger.h
new file mode 100644
index 0000000000..9cf9fda077
--- /dev/null
+++ b/sph/sph_tiger.h
@@ -0,0 +1,191 @@
+/* $Id: sph_tiger.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * Tiger / Tiger-2 interface.
+ *
+ * Tiger has been published in: R. Anderson, E. Biham, "Tiger: A Fast
+ * New Hash Function", Fast Software Encryption - FSE'96, LNCS 1039,
+ * Springer (1996), pp. 89--97.
+ *
+ * Tiger2 has never been formally published, but it was described as
+ * identical to Tiger, except for the padding which is the same in
+ * Tiger2 as it is in MD4. Fortunately, an implementation of Tiger2
+ * was submitted to NESSIE, which produced test vectors; the sphlib
+ * implementation of Tiger2 is compatible with the NESSIE test vectors.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_tiger.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_TIGER_H__
+#define SPH_TIGER_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for Tiger.
+ */
+#define SPH_SIZE_tiger   192
+
+/**
+ * Output size (in bits) for Tiger2.
+ */
+#define SPH_SIZE_tiger2   192
+
+/**
+ * This structure is a context for Tiger computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a Tiger computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running Tiger computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u64 val[3];
+	sph_u64 count;
+#endif
+} sph_tiger_context;
+
+/**
+ * Initialize a Tiger context. This process performs no memory allocation.
+ *
+ * @param cc   the Tiger context (pointer to
+ *             a <code>sph_tiger_context</code>)
+ */
+void sph_tiger_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Tiger context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_tiger(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Tiger computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (24 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Tiger context
+ * @param dst   the destination buffer
+ */
+void sph_tiger_close(void *cc, void *dst);
+
+/**
+ * Apply the Tiger compression function on the provided data. The
+ * <code>msg</code> parameter contains the 8 64-bit input blocks,
+ * as numerical values (hence after the little-endian decoding). The
+ * <code>val</code> parameter contains the 3 64-bit input blocks for
+ * the compression function; the output is written in place in this
+ * array.
+ *
+ * @param msg   the message block (8 values)
+ * @param val   the function 192-bit input and output
+ */
+void sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3]);
+
+/**
+ * This structure is a context for Tiger2 computations. It is identical
+ * to the Tiger context, and they may be freely exchanged, since the
+ * difference between Tiger and Tiger2 resides solely in the padding, which
+ * is computed only in the last computation step.
+ */
+typedef sph_tiger_context sph_tiger2_context;
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Initialize a Tiger2 context. This function is identical to
+ * <code>sph_tiger_init()</code>.
+ *
+ * @param cc   the Tiger2 context (pointer to
+ *             a <code>sph_tiger2_context</code>)
+ */
+void sph_tiger2_init(void *cc);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_tiger2_init   sph_tiger_init
+#endif
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Process some data bytes. This function is identical to
+ * <code>sph_tiger()</code>.
+ *
+ * @param cc     the Tiger2 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_tiger2(void *cc, const void *data, size_t len);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_tiger2   sph_tiger
+#endif
+
+/**
+ * Terminate the current Tiger2 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (24 bytes). The context is automatically
+ * reinitialized. Note that this function is NOT identical to
+ * <code>sph_tiger2_close()</code>: this is the exact and unique point
+ * where Tiger and Tiger2 differ.
+ *
+ * @param cc    the Tiger context
+ * @param dst   the destination buffer
+ */
+void sph_tiger2_close(void *cc, void *dst);
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Apply the Tiger2 compression function, which is identical to the Tiger
+ * compression function.
+ *
+ * @param msg   the message block (8 values)
+ * @param val   the function 192-bit input and output
+ */
+void sph_tiger2_comp(const sph_u64 msg[8], sph_u64 val[3]);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_tiger2_comp   sph_tiger_comp
+#endif
+
+#endif
+
+#endif
diff --git a/sph/sph_whirlpool.h b/sph/sph_whirlpool.h
new file mode 100644
index 0000000000..bc4c3d624b
--- /dev/null
+++ b/sph/sph_whirlpool.h
@@ -0,0 +1,209 @@
+/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * WHIRLPOOL interface.
+ *
+ * WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
+ * version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
+ * (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
+ * version, 2003, with a new diffusion matrix, also described as "plain
+ * WHIRLPOOL"). All three variants are implemented here.
+ *
+ * The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
+ * M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
+ * NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
+ *
+ * The current WHIRLPOOL specification and a reference implementation
+ * can be found on the WHIRLPOOL web page:
+ * http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_whirlpool.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_WHIRLPOOL_H__
+#define SPH_WHIRLPOOL_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for WHIRLPOOL.
+ */
+#define SPH_SIZE_whirlpool   512
+
+/**
+ * Output size (in bits) for WHIRLPOOL-0.
+ */
+#define SPH_SIZE_whirlpool0   512
+
+/**
+ * Output size (in bits) for WHIRLPOOL-1.
+ */
+#define SPH_SIZE_whirlpool1   512
+
+/**
+ * This structure is a context for WHIRLPOOL computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a WHIRLPOOL computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running WHIRLPOOL computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	sph_u64 state[8];
+#if SPH_64
+	sph_u64 count;
+#else
+	sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_whirlpool_context;
+
+/**
+ * Initialize a WHIRLPOOL context. This process performs no memory allocation.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool_context</code>)
+ */
+void sph_whirlpool_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * plain WHIRLPOOL algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool_close(void *cc, void *dst);
+
+/**
+ * WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
+ */
+typedef sph_whirlpool_context sph_whirlpool0_context;
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Initialize a WHIRLPOOL-0 context. This function is identical to
+ * <code>sph_whirlpool_init()</code>.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool0_context</code>)
+ */
+void sph_whirlpool0_init(void *cc);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_whirlpool0_init   sph_whirlpool_init
+#endif
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * WHIRLPOOL-0 algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool0(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL-0 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL-0 context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool0_close(void *cc, void *dst);
+
+/**
+ * WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
+ */
+typedef sph_whirlpool_context sph_whirlpool1_context;
+
+#ifdef DOXYGEN_IGNORE
+/**
+ * Initialize a WHIRLPOOL-1 context. This function is identical to
+ * <code>sph_whirlpool_init()</code>.
+ *
+ * @param cc   the WHIRLPOOL context (pointer to a
+ *             <code>sph_whirlpool1_context</code>)
+ */
+void sph_whirlpool1_init(void *cc);
+#endif
+
+#ifndef DOXYGEN_IGNORE
+#define sph_whirlpool1_init   sph_whirlpool_init
+#endif
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing). This function applies the
+ * WHIRLPOOL-1 algorithm.
+ *
+ * @param cc     the WHIRLPOOL context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_whirlpool1(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current WHIRLPOOL-1 computation and output the result into the
+ * provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the WHIRLPOOL-1 context
+ * @param dst   the destination buffer
+ */
+void sph_whirlpool1_close(void *cc, void *dst);
+
+#endif
+
+#endif
diff --git a/sph/tiger.c b/sph/tiger.c
new file mode 100644
index 0000000000..7ab5d178ac
--- /dev/null
+++ b/sph/tiger.c
@@ -0,0 +1,698 @@
+/* $Id: tiger.c 216 2010-06-08 09:46:57Z tp $ */
+/*
+ * Tiger / Tiger2 implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_tiger.h"
+
+#if SPH_64
+
+static const sph_u64 T1[256] = {
+	SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC),
+	SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A),
+	SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2),
+	SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870),
+	SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C),
+	SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C),
+	SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A),
+	SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E),
+	SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7),
+	SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92),
+	SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D),
+	SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E),
+	SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42),
+	SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04),
+	SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC),
+	SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F),
+	SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B),
+	SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3),
+	SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1),
+	SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066),
+	SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B),
+	SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05),
+	SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28),
+	SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169),
+	SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410),
+	SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B),
+	SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607),
+	SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD),
+	SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5),
+	SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E),
+	SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446),
+	SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE),
+	SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7),
+	SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853),
+	SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201),
+	SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14),
+	SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119),
+	SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2),
+	SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329),
+	SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67),
+	SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D),
+	SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062),
+	SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9),
+	SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C),
+	SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B),
+	SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F),
+	SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC),
+	SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556),
+	SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4),
+	SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881),
+	SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A),
+	SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779),
+	SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC),
+	SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4),
+	SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF),
+	SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96),
+	SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D),
+	SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808),
+	SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE),
+	SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4),
+	SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1),
+	SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017),
+	SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39),
+	SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD),
+	SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194),
+	SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2),
+	SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436),
+	SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8),
+	SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645),
+	SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49),
+	SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F),
+	SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5),
+	SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC),
+	SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA),
+	SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A),
+	SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530),
+	SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631),
+	SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143),
+	SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8),
+	SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD),
+	SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1),
+	SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A),
+	SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06),
+	SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB),
+	SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641),
+	SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40),
+	SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D),
+	SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D),
+	SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8),
+	SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D),
+	SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8),
+	SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886),
+	SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03),
+	SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65),
+	SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6),
+	SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6),
+	SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4),
+	SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450),
+	SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7),
+	SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B),
+	SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5),
+	SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE),
+	SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9),
+	SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024),
+	SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111),
+	SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7),
+	SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1),
+	SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952),
+	SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0),
+	SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77),
+	SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C),
+	SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89),
+	SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2),
+	SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF),
+	SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4),
+	SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34),
+	SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE),
+	SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C),
+	SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A),
+	SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895),
+	SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38),
+	SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74),
+	SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187),
+	SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90),
+	SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648),
+	SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB),
+	SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D),
+	SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A),
+};
+
+static const sph_u64 T2[256] = {
+	SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98),
+	SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5),
+	SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5),
+	SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99),
+	SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16),
+	SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3),
+	SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E),
+	SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2),
+	SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717),
+	SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2),
+	SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9),
+	SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD),
+	SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76),
+	SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA),
+	SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA),
+	SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B),
+	SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC),
+	SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85),
+	SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B),
+	SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70),
+	SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3),
+	SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D),
+	SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC),
+	SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C),
+	SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C),
+	SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822),
+	SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258),
+	SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5),
+	SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06),
+	SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E),
+	SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665),
+	SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762),
+	SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE),
+	SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96),
+	SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4),
+	SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063),
+	SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3),
+	SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE),
+	SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E),
+	SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8),
+	SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD),
+	SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5),
+	SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24),
+	SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B),
+	SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F),
+	SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A),
+	SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556),
+	SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A),
+	SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D),
+	SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4),
+	SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F),
+	SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012),
+	SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A),
+	SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621),
+	SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B),
+	SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97),
+	SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8),
+	SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB),
+	SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30),
+	SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0),
+	SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972),
+	SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C),
+	SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8),
+	SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7),
+	SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635),
+	SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C),
+	SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3),
+	SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61),
+	SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132),
+	SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0),
+	SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31),
+	SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF),
+	SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3),
+	SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB),
+	SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC),
+	SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475),
+	SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967),
+	SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE),
+	SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7),
+	SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1),
+	SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF),
+	SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40),
+	SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D),
+	SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9),
+	SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568),
+	SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB),
+	SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0),
+	SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE),
+	SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1),
+	SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E),
+	SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D),
+	SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183),
+	SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09),
+	SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D),
+	SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784),
+	SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB),
+	SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B),
+	SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23),
+	SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81),
+	SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F),
+	SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4),
+	SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9),
+	SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60),
+	SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF),
+	SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E),
+	SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A),
+	SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615),
+	SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839),
+	SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2),
+	SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552),
+	SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC),
+	SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746),
+	SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A),
+	SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F),
+	SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F),
+	SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054),
+	SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289),
+	SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847),
+	SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642),
+	SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6),
+	SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB),
+	SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C),
+	SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11),
+	SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF),
+	SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A),
+	SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86),
+	SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371),
+	SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E),
+};
+
+static const sph_u64 T3[256] = {
+	SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281),
+	SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63),
+	SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66),
+	SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71),
+	SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017),
+	SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579),
+	SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172),
+	SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41),
+	SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5),
+	SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47),
+	SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491),
+	SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8),
+	SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE),
+	SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902),
+	SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D),
+	SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE),
+	SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067),
+	SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F),
+	SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7),
+	SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB),
+	SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854),
+	SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851),
+	SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4),
+	SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2),
+	SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A),
+	SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74),
+	SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D),
+	SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA),
+	SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07),
+	SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840),
+	SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F),
+	SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB),
+	SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9),
+	SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30),
+	SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9),
+	SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88),
+	SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5),
+	SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252),
+	SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09),
+	SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA),
+	SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0),
+	SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6),
+	SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF),
+	SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559),
+	SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773),
+	SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5),
+	SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778),
+	SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B),
+	SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E),
+	SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6),
+	SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC),
+	SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749),
+	SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C),
+	SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE),
+	SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3),
+	SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D),
+	SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15),
+	SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E),
+	SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC),
+	SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A),
+	SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199),
+	SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80),
+	SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77),
+	SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF),
+	SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503),
+	SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B),
+	SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C),
+	SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76),
+	SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F),
+	SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143),
+	SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396),
+	SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536),
+	SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1),
+	SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D),
+	SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8),
+	SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D),
+	SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB),
+	SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1),
+	SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A),
+	SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32),
+	SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2),
+	SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28),
+	SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F),
+	SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044),
+	SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC),
+	SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C),
+	SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8),
+	SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0),
+	SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365),
+	SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8),
+	SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14),
+	SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00),
+	SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208),
+	SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4),
+	SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1),
+	SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF),
+	SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5),
+	SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6),
+	SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7),
+	SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13),
+	SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9),
+	SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA),
+	SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7),
+	SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45),
+	SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42),
+	SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A),
+	SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4),
+	SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534),
+	SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29),
+	SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C),
+	SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7),
+	SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1),
+	SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C),
+	SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2),
+	SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA),
+	SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50),
+	SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE),
+	SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E),
+	SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7),
+	SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25),
+	SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6),
+	SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68),
+	SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B),
+	SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95),
+	SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05),
+	SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9),
+	SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C),
+	SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E),
+};
+
+static const sph_u64 T4[256] = {
+	SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5),
+	SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7),
+	SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45),
+	SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF),
+	SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F),
+	SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0),
+	SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069),
+	SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7),
+	SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2),
+	SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB),
+	SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94),
+	SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893),
+	SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C),
+	SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D),
+	SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281),
+	SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1),
+	SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F),
+	SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C),
+	SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847),
+	SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04),
+	SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877),
+	SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4),
+	SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06),
+	SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262),
+	SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE),
+	SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5),
+	SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF),
+	SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742),
+	SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495),
+	SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E),
+	SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570),
+	SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1),
+	SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C),
+	SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B),
+	SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567),
+	SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D),
+	SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D),
+	SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF),
+	SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0),
+	SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6),
+	SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE),
+	SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208),
+	SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309),
+	SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27),
+	SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F),
+	SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD),
+	SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7),
+	SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C),
+	SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12),
+	SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216),
+	SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8),
+	SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17),
+	SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA),
+	SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64),
+	SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC),
+	SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013),
+	SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7),
+	SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476),
+	SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A),
+	SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D),
+	SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C),
+	SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638),
+	SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD),
+	SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9),
+	SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C),
+	SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3),
+	SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78),
+	SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453),
+	SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074),
+	SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD),
+	SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397),
+	SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951),
+	SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE),
+	SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C),
+	SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9),
+	SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80),
+	SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310),
+	SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B),
+	SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C),
+	SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1),
+	SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC),
+	SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC),
+	SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B),
+	SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE),
+	SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D),
+	SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D),
+	SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633),
+	SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68),
+	SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4),
+	SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782),
+	SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6),
+	SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21),
+	SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB),
+	SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE),
+	SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA),
+	SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58),
+	SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2),
+	SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC),
+	SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F),
+	SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB),
+	SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63),
+	SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457),
+	SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF),
+	SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99),
+	SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A),
+	SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A),
+	SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01),
+	SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829),
+	SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2),
+	SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0),
+	SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8),
+	SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966),
+	SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B),
+	SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503),
+	SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A),
+	SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0),
+	SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A),
+	SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9),
+	SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0),
+	SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B),
+	SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90),
+	SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4),
+	SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA),
+	SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7),
+	SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED),
+	SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044),
+	SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065),
+	SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F),
+};
+
+#define PASS(a, b, c, mul)   do { \
+		ROUND(a, b, c, X0, mul); \
+		ROUND(b, c, a, X1, mul); \
+		ROUND(c, a, b, X2, mul); \
+		ROUND(a, b, c, X3, mul); \
+		ROUND(b, c, a, X4, mul); \
+		ROUND(c, a, b, X5, mul); \
+		ROUND(a, b, c, X6, mul); \
+		ROUND(b, c, a, X7, mul); \
+	} while (0)
+
+#define ROUND(a, b, c, x, mul)   do { \
+		c ^= x; \
+		a = SPH_T64(a - (T1[c & 0xFF] ^ T2[(c >> 16) & 0xFF] \
+			^ T3[(c >> 32) & 0xFF] ^ T4[(c >> 48) & 0xFF])); \
+		b = SPH_T64(b + (T4[(c >> 8) & 0xFF] ^ T3[(c >> 24) & 0xFF] \
+			^ T2[(c >> 40) & 0xFF] ^ T1[(c >> 56) & 0xFF])); \
+		b = mul(b); \
+	} while (0)
+
+#define MUL5(x)   SPH_T64((x) * SPH_C64(5))
+#define MUL7(x)   SPH_T64((x) * SPH_C64(7))
+#define MUL9(x)   SPH_T64((x) * SPH_C64(9))
+
+#define KSCHED   do { \
+		X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \
+		X1 ^= X0; \
+		X2 = SPH_T64(X2 + X1); \
+		X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \
+		X4 ^= X3; \
+		X5 = SPH_T64(X5 + X4); \
+		X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \
+		X7 ^= X6; \
+		X0 = SPH_T64(X0 + X7); \
+		X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \
+		X2 ^= X1; \
+		X3 = SPH_T64(X3 + X2); \
+		X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \
+		X5 ^= X4; \
+		X6 = SPH_T64(X6 + X5); \
+		X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \
+	} while (0)
+
+#define TIGER_ROUND_BODY(in, r)   do { \
+		sph_u64 A, B, C; \
+		sph_u64 X0, X1, X2, X3, X4, X5, X6, X7; \
+         int i; \
+ \
+   		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+ \
+		X0 = (in(0)); \
+		X1 = (in(1)); \
+		X2 = (in(2)); \
+		X3 = (in(3)); \
+		X4 = (in(4)); \
+		X5 = (in(5)); \
+		X6 = (in(6)); \
+		X7 = (in(7)); \
+        PASS(A, B, C, MUL5); \
+		KSCHED; \
+		PASS(C, A, B, MUL7); \
+		KSCHED; \
+		PASS(B, C, A, MUL9); \
+ \
+		(r)[0] ^= A; \
+		(r)[1] = SPH_T64(B - (r)[1]); \
+		(r)[2] = SPH_T64(C + (r)[2]); \
+	} while (0)
+
+/*
+ * One round of Tiger. The data must be aligned for 64-bit access.
+ */
+static void
+tiger_round(const unsigned char *data, sph_u64 r[3])
+{
+#define TIGER_IN(i)   sph_dec64le_aligned(data + 8 * (i))
+	TIGER_ROUND_BODY(TIGER_IN, r);
+#undef TIGER_IN
+}
+
+/* see sph_tiger.h */
+void
+sph_tiger_init(void *cc)
+{
+	sph_tiger_context *sc;
+
+	sc = cc;
+	sc->val[0] = SPH_C64(0x0123456789ABCDEF);
+	sc->val[1] = SPH_C64(0xFEDCBA9876543210);
+	sc->val[2] = SPH_C64(0xF096A5B4C3B2E187);
+	sc->count = 0;
+}
+
+#define RFUN   tiger_round
+#define HASH   tiger
+#define LE64   1
+#define BLEN   64U
+#define PW01   1
+#define PLW1   1
+#include "md_helper.c"
+
+/* see sph_tiger.h */
+void
+sph_tiger_close(void *cc, void *dst)
+{
+	tiger_close(cc, dst, 3);
+	sph_tiger_init(cc);
+}
+
+/* see sph_tiger.h */
+void
+sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3])
+{
+#define TIGER_IN(i)   msg[i]
+	TIGER_ROUND_BODY(TIGER_IN, val);
+#undef TIGER_IN
+}
+
+#undef HASH
+#define HASH   tiger2
+#undef PW01
+#define CLOSE_ONLY   1
+#include "md_helper.c"
+
+/* see sph_tiger.h */
+void
+sph_tiger2_close(void *cc, void *dst)
+{
+	tiger2_close(cc, dst, 3);
+	sph_tiger2_init(cc);
+}
+
+#endif
diff --git a/sph/whirlpool.c b/sph/whirlpool.c
new file mode 100644
index 0000000000..ee13e4c869
--- /dev/null
+++ b/sph/whirlpool.c
@@ -0,0 +1,3476 @@
+/* $Id: whirlpool.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * WHIRLPOOL implementation.
+ *
+ * Internally, we use little-endian convention, on the assumption that
+ * architectures which favour big-endian encoding are:
+ *   1. rarer
+ *   2. in decreasing numbers
+ *   3. able to decode little-endian data efficiently anyway
+ *
+ * The most common big-endian architecture is Sparc, and Ultrasparc CPU
+ * include special opcodes to perform little-endian accesses, which we use
+ * (see sph_types.h). Most modern CPU designs can work with both endianness
+ * and architecture designer now favour little-endian (basically, x86 has
+ * won the endianness war).
+ *
+ * TODO: implement a 32-bit version. Not only such a version would be handy
+ * for non-64-bit-able architectures, but it may also use smaller tables,
+ * at the expense of more lookups and XORs.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_whirlpool.h"
+
+#if SPH_64
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_WHIRLPOOL
+#define SPH_SMALL_FOOTPRINT_WHIRLPOOL   1
+#endif
+
+/* ====================================================================== */
+/*
+ * Constants for plain WHIRLPOOL (current version).
+ */
+
+static const sph_u64 plain_T0[256] = {
+	SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323),
+	SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8),
+	SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8),
+	SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F),
+	SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6),
+	SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5),
+	SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F),
+	SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252),
+	SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC),
+	SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E),
+	SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C),
+	SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535),
+	SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0),
+	SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2),
+	SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B),
+	SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757),
+	SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777),
+	SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5),
+	SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0),
+	SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA),
+	SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9),
+	SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A),
+	SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0),
+	SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585),
+	SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D),
+	SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4),
+	SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E),
+	SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767),
+	SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727),
+	SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B),
+	SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D),
+	SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8),
+	SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE),
+	SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666),
+	SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717),
+	SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E),
+	SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D),
+	SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707),
+	SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A),
+	SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333),
+	SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202),
+	SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171),
+	SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919),
+	SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9),
+	SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3),
+	SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888),
+	SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626),
+	SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0),
+	SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F),
+	SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080),
+	SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD),
+	SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848),
+	SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A),
+	SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F),
+	SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868),
+	SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE),
+	SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454),
+	SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222),
+	SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1),
+	SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212),
+	SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808),
+	SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC),
+	SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1),
+	SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D),
+	SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000),
+	SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B),
+	SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282),
+	SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B),
+	SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF),
+	SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050),
+	SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3),
+	SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF),
+	SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555),
+	SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA),
+	SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA),
+	SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0),
+	SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C),
+	SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D),
+	SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575),
+	SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A),
+	SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6),
+	SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F),
+	SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4),
+	SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696),
+	SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5),
+	SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959),
+	SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272),
+	SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C),
+	SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878),
+	SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C),
+	SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5),
+	SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161),
+	SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121),
+	SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E),
+	SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7),
+	SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404),
+	SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999),
+	SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D),
+	SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF),
+	SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424),
+	SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB),
+	SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111),
+	SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E),
+	SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB),
+	SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181),
+	SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7),
+	SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313),
+	SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3),
+	SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E),
+	SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303),
+	SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444),
+	SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9),
+	SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB),
+	SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353),
+	SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B),
+	SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C),
+	SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474),
+	SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646),
+	SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989),
+	SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1),
+	SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A),
+	SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909),
+	SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6),
+	SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED),
+	SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242),
+	SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4),
+	SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C),
+	SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686)
+};
+
+#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL
+
+static const sph_u64 plain_T1[256] = {
+	SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326),
+	SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB),
+	SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811),
+	SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D),
+	SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF),
+	SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E),
+	SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30),
+	SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8),
+	SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35),
+	SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A),
+	SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C),
+	SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580),
+	SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3),
+	SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C),
+	SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29),
+	SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5),
+	SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8),
+	SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E),
+	SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023),
+	SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44),
+	SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF),
+	SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A),
+	SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9),
+	SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9),
+	SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F),
+	SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407),
+	SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3),
+	SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778),
+	SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702),
+	SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7),
+	SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2),
+	SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856),
+	SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD),
+	SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671),
+	SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF),
+	SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A),
+	SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58),
+	SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F),
+	SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0),
+	SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6),
+	SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212),
+	SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE),
+	SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1),
+	SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F),
+	SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8),
+	SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC),
+	SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B),
+	SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059),
+	SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77),
+	SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4),
+	SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB),
+	SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832),
+	SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D),
+	SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D),
+	SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F),
+	SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7),
+	SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE),
+	SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F),
+	SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A),
+	SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282),
+	SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848),
+	SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF),
+	SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0),
+	SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8),
+	SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000),
+	SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E),
+	SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6),
+	SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3),
+	SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE),
+	SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA),
+	SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338),
+	SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4),
+	SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7),
+	SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9),
+	SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03),
+	SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E),
+	SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC),
+	SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F),
+	SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA),
+	SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE),
+	SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685),
+	SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7),
+	SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A),
+	SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652),
+	SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3),
+	SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB),
+	SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5),
+	SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16),
+	SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F),
+	SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98),
+	SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4),
+	SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E),
+	SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134),
+	SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE),
+	SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1),
+	SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424),
+	SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925),
+	SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65),
+	SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69),
+	SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419),
+	SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A),
+	SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199),
+	SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04),
+	SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0),
+	SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD),
+	SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C),
+	SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B),
+	SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305),
+	SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39),
+	SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B),
+	SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E),
+	SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988),
+	SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A),
+	SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1),
+	SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53),
+	SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B),
+	SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3),
+	SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C),
+	SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5),
+	SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA),
+	SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7),
+	SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941),
+	SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F),
+	SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6),
+	SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268),
+	SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED),
+	SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86),
+	SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2)
+};
+
+static const sph_u64 plain_T2[256] = {
+	SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646),
+	SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD),
+	SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D),
+	SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E),
+	SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51),
+	SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7),
+	SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE),
+	SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4),
+	SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565),
+	SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01),
+	SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18),
+	SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A),
+	SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD),
+	SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99),
+	SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996),
+	SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE),
+	SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE),
+	SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7),
+	SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD),
+	SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9),
+	SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F),
+	SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14),
+	SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D),
+	SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917),
+	SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA),
+	SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5),
+	SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C),
+	SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE),
+	SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E),
+	SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B),
+	SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA),
+	SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD),
+	SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1),
+	SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC),
+	SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E),
+	SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21),
+	SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A),
+	SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E),
+	SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4),
+	SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666),
+	SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204),
+	SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2),
+	SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132),
+	SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF),
+	SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB),
+	SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D),
+	SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C),
+	SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D),
+	SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E),
+	SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D),
+	SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87),
+	SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290),
+	SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4),
+	SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE),
+	SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0),
+	SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741),
+	SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8),
+	SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44),
+	SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF),
+	SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224),
+	SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810),
+	SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5),
+	SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F),
+	SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A),
+	SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56),
+	SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619),
+	SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336),
+	SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43),
+	SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0),
+	SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB),
+	SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3),
+	SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA),
+	SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9),
+	SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369),
+	SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D),
+	SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38),
+	SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A),
+	SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA),
+	SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09),
+	SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1),
+	SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E),
+	SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5),
+	SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231),
+	SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397),
+	SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2),
+	SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4),
+	SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698),
+	SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0),
+	SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805),
+	SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457),
+	SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2),
+	SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442),
+	SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C),
+	SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193),
+	SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408),
+	SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F),
+	SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A),
+	SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3),
+	SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948),
+	SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B),
+	SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922),
+	SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C),
+	SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB),
+	SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F),
+	SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3),
+	SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26),
+	SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB),
+	SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC),
+	SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06),
+	SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88),
+	SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F),
+	SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B),
+	SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6),
+	SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316),
+	SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8),
+	SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8),
+	SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C),
+	SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F),
+	SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF),
+	SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774),
+	SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112),
+	SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71),
+	SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7),
+	SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884),
+	SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55),
+	SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8),
+	SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211)
+};
+
+static const sph_u64 plain_T3[256] = {
+	SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF),
+	SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F),
+	SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62),
+	SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E),
+	SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104),
+	SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706),
+	SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE),
+	SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407),
+	SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576),
+	SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C),
+	SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C),
+	SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1),
+	SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47),
+	SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED),
+	SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A),
+	SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16),
+	SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6),
+	SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756),
+	SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17),
+	SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995),
+	SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA),
+	SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422),
+	SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A),
+	SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB),
+	SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34),
+	SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503),
+	SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6),
+	SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6),
+	SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB),
+	SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D),
+	SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94),
+	SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F),
+	SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171),
+	SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3),
+	SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B),
+	SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC),
+	SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99),
+	SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B),
+	SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F),
+	SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF),
+	SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A),
+	SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8),
+	SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D),
+	SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A),
+	SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48),
+	SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92),
+	SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE),
+	SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A),
+	SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33),
+	SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA),
+	SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE),
+	SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075),
+	SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F),
+	SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E),
+	SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5),
+	SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C),
+	SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819),
+	SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA),
+	SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12),
+	SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A),
+	SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028),
+	SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B),
+	SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F),
+	SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9),
+	SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000),
+	SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687),
+	SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0),
+	SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677),
+	SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329),
+	SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D),
+	SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18),
+	SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374),
+	SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C),
+	SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965),
+	SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968),
+	SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7),
+	SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C),
+	SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64),
+	SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC),
+	SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998),
+	SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159),
+	SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63),
+	SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3),
+	SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4),
+	SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6),
+	SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220),
+	SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7),
+	SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861),
+	SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085),
+	SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586),
+	SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B),
+	SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8),
+	SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5),
+	SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66),
+	SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC),
+	SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814),
+	SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7),
+	SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39),
+	SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384),
+	SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4),
+	SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D),
+	SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255),
+	SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B),
+	SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60),
+	SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF),
+	SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C),
+	SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F),
+	SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8),
+	SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB),
+	SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F),
+	SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849),
+	SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37),
+	SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D),
+	SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602),
+	SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627),
+	SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1),
+	SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9),
+	SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43),
+	SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97),
+	SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42),
+	SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2),
+	SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D),
+	SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154),
+	SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E),
+	SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457),
+	SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E),
+	SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831),
+	SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4)
+};
+
+static const sph_u64 plain_T4[256] = {
+	SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05),
+	SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13),
+	SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9),
+	SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42),
+	SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459),
+	SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB),
+	SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F),
+	SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA),
+	SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689),
+	SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04),
+	SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60),
+	SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5),
+	SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753),
+	SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E),
+	SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62),
+	SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682),
+	SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F),
+	SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B),
+	SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3),
+	SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E),
+	SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06),
+	SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250),
+	SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69),
+	SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C),
+	SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2),
+	SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3),
+	SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED),
+	SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F),
+	SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25),
+	SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C),
+	SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF),
+	SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E),
+	SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123),
+	SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317),
+	SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8),
+	SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84),
+	SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975),
+	SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38),
+	SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA),
+	SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85),
+	SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10),
+	SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF),
+	SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8),
+	SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86),
+	SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B),
+	SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234),
+	SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D),
+	SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9),
+	SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378),
+	SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74),
+	SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26),
+	SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A),
+	SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7),
+	SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2),
+	SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567),
+	SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19),
+	SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A),
+	SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D),
+	SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB),
+	SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90),
+	SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840),
+	SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33),
+	SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61),
+	SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5),
+	SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745),
+	SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064),
+	SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8),
+	SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911),
+	SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA),
+	SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB),
+	SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B),
+	SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92),
+	SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503),
+	SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9),
+	SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E),
+	SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0),
+	SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452),
+	SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F),
+	SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824),
+	SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963),
+	SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8),
+	SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE),
+	SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4),
+	SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666),
+	SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2),
+	SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7),
+	SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A),
+	SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7),
+	SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614),
+	SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41),
+	SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F),
+	SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515),
+	SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0),
+	SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76),
+	SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420),
+	SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC),
+	SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968),
+	SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6),
+	SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D),
+	SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31),
+	SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588),
+	SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A),
+	SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B),
+	SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C),
+	SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB),
+	SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98),
+	SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6),
+	SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57),
+	SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18),
+	SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A),
+	SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721),
+	SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1),
+	SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2),
+	SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758),
+	SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147),
+	SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987),
+	SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A),
+	SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C),
+	SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B),
+	SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD),
+	SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48),
+	SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9),
+	SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B),
+	SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A),
+	SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49),
+	SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA),
+	SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444)
+};
+
+static const sph_u64 plain_T5[256] = {
+	SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523),
+	SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8),
+	SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8),
+	SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F),
+	SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6),
+	SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5),
+	SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F),
+	SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52),
+	SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC),
+	SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E),
+	SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C),
+	SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535),
+	SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0),
+	SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2),
+	SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B),
+	SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257),
+	SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77),
+	SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5),
+	SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0),
+	SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA),
+	SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9),
+	SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A),
+	SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0),
+	SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85),
+	SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D),
+	SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4),
+	SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E),
+	SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67),
+	SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527),
+	SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B),
+	SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D),
+	SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8),
+	SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE),
+	SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766),
+	SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817),
+	SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E),
+	SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D),
+	SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807),
+	SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A),
+	SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533),
+	SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002),
+	SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71),
+	SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819),
+	SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9),
+	SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3),
+	SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488),
+	SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26),
+	SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0),
+	SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F),
+	SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480),
+	SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD),
+	SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48),
+	SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A),
+	SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F),
+	SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768),
+	SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE),
+	SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54),
+	SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22),
+	SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1),
+	SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012),
+	SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008),
+	SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC),
+	SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1),
+	SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D),
+	SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000),
+	SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B),
+	SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482),
+	SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B),
+	SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF),
+	SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50),
+	SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3),
+	SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF),
+	SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255),
+	SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA),
+	SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA),
+	SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0),
+	SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C),
+	SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D),
+	SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75),
+	SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A),
+	SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6),
+	SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F),
+	SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4),
+	SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496),
+	SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5),
+	SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259),
+	SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772),
+	SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C),
+	SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778),
+	SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C),
+	SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5),
+	SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61),
+	SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521),
+	SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E),
+	SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7),
+	SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004),
+	SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99),
+	SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D),
+	SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF),
+	SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24),
+	SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB),
+	SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811),
+	SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E),
+	SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB),
+	SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81),
+	SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7),
+	SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813),
+	SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3),
+	SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E),
+	SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803),
+	SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44),
+	SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9),
+	SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB),
+	SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253),
+	SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B),
+	SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C),
+	SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774),
+	SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46),
+	SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89),
+	SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1),
+	SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A),
+	SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809),
+	SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6),
+	SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED),
+	SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42),
+	SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4),
+	SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C),
+	SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486)
+};
+
+static const sph_u64 plain_T6[256] = {
+	SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C),
+	SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887),
+	SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA),
+	SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21),
+	SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2),
+	SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3),
+	SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1),
+	SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255),
+	SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA),
+	SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02),
+	SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30),
+	SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4),
+	SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7),
+	SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F),
+	SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31),
+	SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741),
+	SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1),
+	SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3),
+	SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7),
+	SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F),
+	SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903),
+	SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28),
+	SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA),
+	SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E),
+	SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69),
+	SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7),
+	SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8),
+	SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781),
+	SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C),
+	SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16),
+	SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9),
+	SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847),
+	SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F),
+	SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685),
+	SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C),
+	SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42),
+	SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4),
+	SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C),
+	SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75),
+	SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC),
+	SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208),
+	SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9),
+	SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964),
+	SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943),
+	SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB),
+	SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A),
+	SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698),
+	SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA),
+	SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C),
+	SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A),
+	SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13),
+	SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D),
+	SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5),
+	SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61),
+	SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD),
+	SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82),
+	SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D),
+	SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288),
+	SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3),
+	SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248),
+	SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820),
+	SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97),
+	SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE),
+	SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4),
+	SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC),
+	SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232),
+	SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C),
+	SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86),
+	SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D),
+	SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB),
+	SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B),
+	SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549),
+	SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F),
+	SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2),
+	SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027),
+	SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70),
+	SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29),
+	SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9),
+	SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12),
+	SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF),
+	SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C),
+	SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477),
+	SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662),
+	SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533),
+	SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979),
+	SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5),
+	SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D),
+	SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD),
+	SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A),
+	SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE),
+	SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199),
+	SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184),
+	SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78),
+	SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B),
+	SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410),
+	SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E),
+	SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34),
+	SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B),
+	SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490),
+	SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96),
+	SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144),
+	SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25),
+	SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B),
+	SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E),
+	SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB),
+	SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C),
+	SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B),
+	SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5),
+	SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C),
+	SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D),
+	SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E),
+	SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6),
+	SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351),
+	SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C),
+	SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD),
+	SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD),
+	SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605),
+	SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E),
+	SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3),
+	SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8),
+	SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924),
+	SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2),
+	SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93),
+	SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215),
+	SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA),
+	SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D),
+	SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622)
+};
+
+static const sph_u64 plain_T7[256] = {
+	SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23),
+	SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8),
+	SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8),
+	SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F),
+	SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6),
+	SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5),
+	SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F),
+	SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552),
+	SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC),
+	SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E),
+	SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C),
+	SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435),
+	SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0),
+	SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2),
+	SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B),
+	SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157),
+	SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177),
+	SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5),
+	SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0),
+	SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA),
+	SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9),
+	SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A),
+	SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0),
+	SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85),
+	SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D),
+	SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4),
+	SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E),
+	SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167),
+	SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27),
+	SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B),
+	SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D),
+	SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8),
+	SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE),
+	SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566),
+	SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17),
+	SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E),
+	SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D),
+	SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07),
+	SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A),
+	SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33),
+	SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802),
+	SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971),
+	SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419),
+	SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9),
+	SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3),
+	SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88),
+	SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826),
+	SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0),
+	SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F),
+	SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80),
+	SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD),
+	SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48),
+	SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A),
+	SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F),
+	SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68),
+	SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE),
+	SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54),
+	SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822),
+	SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1),
+	SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812),
+	SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008),
+	SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC),
+	SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1),
+	SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D),
+	SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B),
+	SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282),
+	SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B),
+	SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF),
+	SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50),
+	SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3),
+	SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF),
+	SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955),
+	SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA),
+	SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA),
+	SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0),
+	SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C),
+	SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D),
+	SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975),
+	SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A),
+	SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6),
+	SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F),
+	SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4),
+	SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296),
+	SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5),
+	SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959),
+	SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572),
+	SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C),
+	SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78),
+	SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C),
+	SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5),
+	SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961),
+	SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421),
+	SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E),
+	SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7),
+	SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004),
+	SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99),
+	SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D),
+	SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF),
+	SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024),
+	SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB),
+	SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411),
+	SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E),
+	SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB),
+	SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81),
+	SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7),
+	SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13),
+	SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3),
+	SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E),
+	SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03),
+	SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44),
+	SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9),
+	SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB),
+	SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153),
+	SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B),
+	SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C),
+	SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74),
+	SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546),
+	SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89),
+	SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1),
+	SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A),
+	SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409),
+	SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6),
+	SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED),
+	SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542),
+	SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4),
+	SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C),
+	SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286)
+};
+
+#endif
+
+/*
+ * Round constants.
+ */
+static const sph_u64 plain_RC[10] = {
+	SPH_C64(0x4F01B887E8C62318),
+	SPH_C64(0x52916F79F5D2A636),
+	SPH_C64(0x357B0CA38E9BBC60),
+	SPH_C64(0x57FE4B2EC2D7E01D),
+	SPH_C64(0xDA4AF09FE5377715),
+	SPH_C64(0x856BA0B10A29C958),
+	SPH_C64(0x67053ECBF4105DBD),
+	SPH_C64(0xD8957DA78B4127E4),
+	SPH_C64(0x9E4717DD667CEEFB),
+	SPH_C64(0x33835AAD07BF2DCA)
+};
+
+/* ====================================================================== */
+/*
+ * Constants for plain WHIRLPOOL-0 (first version).
+ */
+
+static const sph_u64 old0_T0[256] = {
+	SPH_C64(0xD50F67D568B86868), SPH_C64(0xB71ECEB7D06DD0D0),
+	SPH_C64(0x60E00B60EB20EBEB), SPH_C64(0x876E45872B7D2B2B),
+	SPH_C64(0x75327A7548D84848), SPH_C64(0xD3019CD39DBA9D9D),
+	SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x53977353E431E4E4),
+	SPH_C64(0x48A84B48E338E3E3), SPH_C64(0x15D27115A3F8A3A3),
+	SPH_C64(0x13DC8A1356FA5656), SPH_C64(0xBFFD7CBF819E8181),
+	SPH_C64(0x94B2CF947D877D7D), SPH_C64(0x122ADB12F10EF1F1),
+	SPH_C64(0xABD95CAB85928585), SPH_C64(0xDC1A84DC9EBF9E9E),
+	SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0x8C8A048C8E8F8E8E),
+	SPH_C64(0x859FE78578887878), SPH_C64(0xC5D41EC5CA43CACA),
+	SPH_C64(0x4BAFB84B17391717), SPH_C64(0x37882137A9E6A9A9),
+	SPH_C64(0xF84E2FF861A36161), SPH_C64(0xA633E6A6D562D5D5),
+	SPH_C64(0x348FD2345DE75D5D), SPH_C64(0x275358270B1D0B0B),
+	SPH_C64(0x869814868C898C8C), SPH_C64(0xCCC1FDCC3C443C3C),
+	SPH_C64(0xB6E89FB677997777), SPH_C64(0x08E3B20851F35151),
+	SPH_C64(0xAA2F0DAA22662222), SPH_C64(0x57682A5742C64242),
+	SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x19CE9A1954FC5454),
+	SPH_C64(0x5873325841C34141), SPH_C64(0xBAF474BA809D8080),
+	SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0xA4C244A486978686),
+	SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0x78D8C07818281818),
+	SPH_C64(0x96436D962E722E2E), SPH_C64(0x16D5821657F95757),
+	SPH_C64(0x1E36301E060A0606), SPH_C64(0xF75537F762A66262),
+	SPH_C64(0x0307F303F401F4F4), SPH_C64(0xEE9BADEE365A3636),
+	SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0xDA147FDA6BBD6B6B),
+	SPH_C64(0x77C3D8771B2D1B1B), SPH_C64(0xEC6A0FEC65AF6565),
+	SPH_C64(0xBCFA8FBC759F7575), SPH_C64(0x5090805010301010),
+	SPH_C64(0x95449E95DA73DADA), SPH_C64(0x703B727049DB4949),
+	SPH_C64(0xBE0B2DBE266A2626), SPH_C64(0x3A629B3AF916F9F9),
+	SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xE37117E366AA6666),
+	SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0x6803B968BAD3BABA),
+	SPH_C64(0x2CB7192CAEEFAEAE), SPH_C64(0x0DEABA0D50F05050),
+	SPH_C64(0x07F8AA0752F65252), SPH_C64(0x3D9A313DABE0ABAB),
+	SPH_C64(0x112D2811050F0505), SPH_C64(0x1723D317F00DF0F0),
+	SPH_C64(0x396568390D170D0D), SPH_C64(0xA2CCBFA273957373),
+	SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x14242014040C0404),
+	SPH_C64(0xA03D1DA020602020), SPH_C64(0x215DA321FE1FFEFE),
+	SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x060EFB06F502F5F5),
+	SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x3E9DC23E5FE15F5F),
+	SPH_C64(0x225A50220A1E0A0A), SPH_C64(0x5B74C15BB5C2B5B5),
+	SPH_C64(0xE78E4EE7C05DC0C0), SPH_C64(0x1AC9691AA0FDA0A0),
+	SPH_C64(0xA8DEAFA871937171), SPH_C64(0x0BE4410BA5F2A5A5),
+	SPH_C64(0x995875992D772D2D), SPH_C64(0xFD4727FD60A06060),
+	SPH_C64(0xA7C5B7A772967272), SPH_C64(0xE57FECE593A89393),
+	SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x2848402808180808),
+	SPH_C64(0xB5EF6CB583988383), SPH_C64(0xA53415A521632121),
+	SPH_C64(0x3186DA315CE45C5C), SPH_C64(0xA1CB4CA187948787),
+	SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x47B35347E03DE0E0),
+	SPH_C64(0x0000000000000000), SPH_C64(0xE89556E8C358C3C3),
+	SPH_C64(0x5A82905A12361212), SPH_C64(0xEF6DFCEF91AE9191),
+	SPH_C64(0x98AE24988A838A8A), SPH_C64(0x0A12100A02060202),
+	SPH_C64(0x6CFCE06C1C241C1C), SPH_C64(0x59856359E637E6E6),
+	SPH_C64(0x4C57124C45CF4545), SPH_C64(0xED9C5EEDC25BC2C2),
+	SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x2E46BB2EFD1AFDFD),
+	SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x495E1A4944CC4444),
+	SPH_C64(0x1FC0611FA1FEA1A1), SPH_C64(0x61165A614CD44C4C),
+	SPH_C64(0xFFB685FF33553333), SPH_C64(0xF6A366F6C552C5C5),
+	SPH_C64(0xAED054AE84918484), SPH_C64(0xAF2605AF23652323),
+	SPH_C64(0x91BBC7917C847C7C), SPH_C64(0x4A59E94AB0CDB0B0),
+	SPH_C64(0xB11035B1256F2525), SPH_C64(0x41BDA841153F1515),
+	SPH_C64(0xE180B5E1355F3535), SPH_C64(0xD0066FD069BB6969),
+	SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0xFE40D4FE94A19494),
+	SPH_C64(0x641F52644DD74D4D), SPH_C64(0xADD7A7AD70907070),
+	SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x29BE1129AFECAFAF),
+	SPH_C64(0xDEEB26DECD4ACDCD), SPH_C64(0xA928FEA9D667D6D6),
+	SPH_C64(0xC12B47C16CB46C6C), SPH_C64(0x5166D151B7C4B7B7),
+	SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0x2D41482D091B0909),
+	SPH_C64(0x1838CB18F308F3F3), SPH_C64(0xE6781FE667A96767),
+	SPH_C64(0x0EED490EA4F1A4A4), SPH_C64(0x65E90365EA23EAEA),
+	SPH_C64(0x7BDF337BEC29ECEC), SPH_C64(0x546FD954B6C7B6B6),
+	SPH_C64(0xA33AEEA3D461D4D4), SPH_C64(0xBD0CDEBDD26BD2D2),
+	SPH_C64(0x44B4A044143C1414), SPH_C64(0x66EEF0661E221E1E),
+	SPH_C64(0x42BA5B42E13EE1E1), SPH_C64(0xB4193DB4246C2424),
+	SPH_C64(0xD8E5DDD838483838), SPH_C64(0xF9B87EF9C657C6C6),
+	SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x7A29627A4BDD4B4B),
+	SPH_C64(0x8F8DF78F7A8E7A7A), SPH_C64(0xD2F7CDD23A4E3A3A),
+	SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x3B94CA3B5EE25E5E),
+	SPH_C64(0x8469B684DF7CDFDF), SPH_C64(0xFB49DCFB95A29595),
+	SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x38933938AAE3AAAA),
+	SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xD1F03ED1CE4FCECE),
+	SPH_C64(0x1B3F381B07090707), SPH_C64(0x337778330F110F0F),
+	SPH_C64(0xC9C8F5C93D473D3D), SPH_C64(0x25A2FA2558E85858),
+	SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xC22CB4C298B59898),
+	SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x1D31C31DF20BF2F2),
+	SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x5599885511331111),
+	SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0x9DA72C9D8B808B8B),
+	SPH_C64(0x5261225243C54343), SPH_C64(0x0F1B180F03050303),
+	SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0x8B72AE8BDC79DCDC),
+	SPH_C64(0x569E7B56E532E5E5), SPH_C64(0x404BF940B2CBB2B2),
+	SPH_C64(0x6B044A6B4ED24E4E), SPH_C64(0xFCB176FCC754C7C7),
+	SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x6AF21B6AE926E9E9),
+	SPH_C64(0xBB0225BB27692727), SPH_C64(0x5D7A3A5D40C04040),
+	SPH_C64(0x9F568E9FD875D8D8), SPH_C64(0xEB92A5EB37593737),
+	SPH_C64(0xE076E4E092AB9292), SPH_C64(0x89830C898F8C8F8F),
+	SPH_C64(0x0509080501030101), SPH_C64(0x69F5E8691D271D1D),
+	SPH_C64(0x02F1A20253F55353), SPH_C64(0xC6D3EDC63E423E3E),
+	SPH_C64(0x20ABF22059EB5959), SPH_C64(0xE28746E2C15EC1C1),
+	SPH_C64(0x6E0D426E4FD14F4F), SPH_C64(0xFABF8DFA32563232),
+	SPH_C64(0x4EA6B04E163A1616), SPH_C64(0x35798335FA13FAFA),
+	SPH_C64(0xB9F387B9749C7474), SPH_C64(0x30708B30FB10FBFB),
+	SPH_C64(0xF25C3FF263A56363), SPH_C64(0xD9138CD99FBC9F9F),
+	SPH_C64(0xE489BDE4345C3434), SPH_C64(0x72CAD0721A2E1A1A),
+	SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x2FB0EA2F5AEE5A5A),
+	SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xCACF06CAC946C9C9),
+	SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x0915E309F607F6F6),
+	SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x88755D8828782828),
+	SPH_C64(0x92BC349288858888), SPH_C64(0xCD37ACCD9BB09B9B),
+	SPH_C64(0xF5A495F531533131), SPH_C64(0x367E70360E120E0E),
+	SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x7F206A7F4ADE4A4A),
+	SPH_C64(0x6FFB136FE825E8E8), SPH_C64(0xF452C4F496A79696),
+	SPH_C64(0x04FF5904A6F7A6A6), SPH_C64(0x3C6C603C0C140C0C),
+	SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x8096EF80798B7979),
+	SPH_C64(0x76358976BCD9BCBC), SPH_C64(0x7C27997CBEDFBEBE),
+	SPH_C64(0x74C42B74EF2CEFEF), SPH_C64(0xCB3957CB6EB26E6E),
+	SPH_C64(0x434C0A4346CA4646), SPH_C64(0xF15BCCF197A49797),
+	SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x7ED63B7EED2AEDED),
+	SPH_C64(0x7DD1C87D192B1919), SPH_C64(0x9A5F869AD976D9D9),
+	SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0xC725BCC799B69999),
+	SPH_C64(0x32812932A8E5A8A8), SPH_C64(0x8D7C558D297B2929),
+	SPH_C64(0xE96307E964AC6464), SPH_C64(0x63E7F8631F211F1F),
+	SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x1CC7921C55FF5555),
+	SPH_C64(0x5F8B985F13351313), SPH_C64(0x6D0AB16DBBD0BBBB),
+	SPH_C64(0x0C1CEB0CF704F7F7), SPH_C64(0xCE305FCE6FB16F6F),
+	SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x4645024647C94747),
+	SPH_C64(0x934A65932F712F2F), SPH_C64(0x71CD2371EE2FEEEE),
+	SPH_C64(0x6211A962B8D5B8B8), SPH_C64(0x8A84FF8A7B8D7B7B),
+	SPH_C64(0x97B53C9789868989), SPH_C64(0xF0AD9DF030503030),
+	SPH_C64(0xB805D6B8D368D3D3), SPH_C64(0x9EA0DF9E7F817F7F),
+	SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282)
+};
+
+#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL
+
+static const sph_u64 old0_T1[256] = {
+	SPH_C64(0x0F67D568B86868D5), SPH_C64(0x1ECEB7D06DD0D0B7),
+	SPH_C64(0xE00B60EB20EBEB60), SPH_C64(0x6E45872B7D2B2B87),
+	SPH_C64(0x327A7548D8484875), SPH_C64(0x019CD39DBA9D9DD3),
+	SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0x977353E431E4E453),
+	SPH_C64(0xA84B48E338E3E348), SPH_C64(0xD27115A3F8A3A315),
+	SPH_C64(0xDC8A1356FA565613), SPH_C64(0xFD7CBF819E8181BF),
+	SPH_C64(0xB2CF947D877D7D94), SPH_C64(0x2ADB12F10EF1F112),
+	SPH_C64(0xD95CAB85928585AB), SPH_C64(0x1A84DC9EBF9E9EDC),
+	SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x8A048C8E8F8E8E8C),
+	SPH_C64(0x9FE7857888787885), SPH_C64(0xD41EC5CA43CACAC5),
+	SPH_C64(0xAFB84B173917174B), SPH_C64(0x882137A9E6A9A937),
+	SPH_C64(0x4E2FF861A36161F8), SPH_C64(0x33E6A6D562D5D5A6),
+	SPH_C64(0x8FD2345DE75D5D34), SPH_C64(0x5358270B1D0B0B27),
+	SPH_C64(0x9814868C898C8C86), SPH_C64(0xC1FDCC3C443C3CCC),
+	SPH_C64(0xE89FB677997777B6), SPH_C64(0xE3B20851F3515108),
+	SPH_C64(0x2F0DAA22662222AA), SPH_C64(0x682A5742C6424257),
+	SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xCE9A1954FC545419),
+	SPH_C64(0x73325841C3414158), SPH_C64(0xF474BA809D8080BA),
+	SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0xC244A486978686A4),
+	SPH_C64(0x42F145B3C8B3B345), SPH_C64(0xD8C0781828181878),
+	SPH_C64(0x436D962E722E2E96), SPH_C64(0xD5821657F9575716),
+	SPH_C64(0x36301E060A06061E), SPH_C64(0x5537F762A66262F7),
+	SPH_C64(0x07F303F401F4F403), SPH_C64(0x9BADEE365A3636EE),
+	SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0x147FDA6BBD6B6BDA),
+	SPH_C64(0xC3D8771B2D1B1B77), SPH_C64(0x6A0FEC65AF6565EC),
+	SPH_C64(0xFA8FBC759F7575BC), SPH_C64(0x9080501030101050),
+	SPH_C64(0x449E95DA73DADA95), SPH_C64(0x3B727049DB494970),
+	SPH_C64(0x0B2DBE266A2626BE), SPH_C64(0x629B3AF916F9F93A),
+	SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0x7117E366AA6666E3),
+	SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x03B968BAD3BABA68),
+	SPH_C64(0xB7192CAEEFAEAE2C), SPH_C64(0xEABA0D50F050500D),
+	SPH_C64(0xF8AA0752F6525207), SPH_C64(0x9A313DABE0ABAB3D),
+	SPH_C64(0x2D2811050F050511), SPH_C64(0x23D317F00DF0F017),
+	SPH_C64(0x6568390D170D0D39), SPH_C64(0xCCBFA273957373A2),
+	SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x242014040C040414),
+	SPH_C64(0x3D1DA020602020A0), SPH_C64(0x5DA321FE1FFEFE21),
+	SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0x0EFB06F502F5F506),
+	SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0x9DC23E5FE15F5F3E),
+	SPH_C64(0x5A50220A1E0A0A22), SPH_C64(0x74C15BB5C2B5B55B),
+	SPH_C64(0x8E4EE7C05DC0C0E7), SPH_C64(0xC9691AA0FDA0A01A),
+	SPH_C64(0xDEAFA871937171A8), SPH_C64(0xE4410BA5F2A5A50B),
+	SPH_C64(0x5875992D772D2D99), SPH_C64(0x4727FD60A06060FD),
+	SPH_C64(0xC5B7A772967272A7), SPH_C64(0x7FECE593A89393E5),
+	SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x4840280818080828),
+	SPH_C64(0xEF6CB583988383B5), SPH_C64(0x3415A521632121A5),
+	SPH_C64(0x86DA315CE45C5C31), SPH_C64(0xCB4CA187948787A1),
+	SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xB35347E03DE0E047),
+	SPH_C64(0x0000000000000000), SPH_C64(0x9556E8C358C3C3E8),
+	SPH_C64(0x82905A123612125A), SPH_C64(0x6DFCEF91AE9191EF),
+	SPH_C64(0xAE24988A838A8A98), SPH_C64(0x12100A020602020A),
+	SPH_C64(0xFCE06C1C241C1C6C), SPH_C64(0x856359E637E6E659),
+	SPH_C64(0x57124C45CF45454C), SPH_C64(0x9C5EEDC25BC2C2ED),
+	SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x46BB2EFD1AFDFD2E),
+	SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x5E1A4944CC444449),
+	SPH_C64(0xC0611FA1FEA1A11F), SPH_C64(0x165A614CD44C4C61),
+	SPH_C64(0xB685FF33553333FF), SPH_C64(0xA366F6C552C5C5F6),
+	SPH_C64(0xD054AE84918484AE), SPH_C64(0x2605AF23652323AF),
+	SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x59E94AB0CDB0B04A),
+	SPH_C64(0x1035B1256F2525B1), SPH_C64(0xBDA841153F151541),
+	SPH_C64(0x80B5E1355F3535E1), SPH_C64(0x066FD069BB6969D0),
+	SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x40D4FE94A19494FE),
+	SPH_C64(0x1F52644DD74D4D64), SPH_C64(0xD7A7AD70907070AD),
+	SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xBE1129AFECAFAF29),
+	SPH_C64(0xEB26DECD4ACDCDDE), SPH_C64(0x28FEA9D667D6D6A9),
+	SPH_C64(0x2B47C16CB46C6CC1), SPH_C64(0x66D151B7C4B7B751),
+	SPH_C64(0x6B933FF815F8F83F), SPH_C64(0x41482D091B09092D),
+	SPH_C64(0x38CB18F308F3F318), SPH_C64(0x781FE667A96767E6),
+	SPH_C64(0xED490EA4F1A4A40E), SPH_C64(0xE90365EA23EAEA65),
+	SPH_C64(0xDF337BEC29ECEC7B), SPH_C64(0x6FD954B6C7B6B654),
+	SPH_C64(0x3AEEA3D461D4D4A3), SPH_C64(0x0CDEBDD26BD2D2BD),
+	SPH_C64(0xB4A044143C141444), SPH_C64(0xEEF0661E221E1E66),
+	SPH_C64(0xBA5B42E13EE1E142), SPH_C64(0x193DB4246C2424B4),
+	SPH_C64(0xE5DDD838483838D8), SPH_C64(0xB87EF9C657C6C6F9),
+	SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0x29627A4BDD4B4B7A),
+	SPH_C64(0x8DF78F7A8E7A7A8F), SPH_C64(0xF7CDD23A4E3A3AD2),
+	SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0x94CA3B5EE25E5E3B),
+	SPH_C64(0x69B684DF7CDFDF84), SPH_C64(0x49DCFB95A29595FB),
+	SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x933938AAE3AAAA38),
+	SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0xF03ED1CE4FCECED1),
+	SPH_C64(0x3F381B070907071B), SPH_C64(0x7778330F110F0F33),
+	SPH_C64(0xC8F5C93D473D3DC9), SPH_C64(0xA2FA2558E8585825),
+	SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x2CB4C298B59898C2),
+	SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0x31C31DF20BF2F21D),
+	SPH_C64(0xF65101A7F4A7A701), SPH_C64(0x9988551133111155),
+	SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0xA72C9D8B808B8B9D),
+	SPH_C64(0x61225243C5434352), SPH_C64(0x1B180F030503030F),
+	SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x72AE8BDC79DCDC8B),
+	SPH_C64(0x9E7B56E532E5E556), SPH_C64(0x4BF940B2CBB2B240),
+	SPH_C64(0x044A6B4ED24E4E6B), SPH_C64(0xB176FCC754C7C7FC),
+	SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0xF21B6AE926E9E96A),
+	SPH_C64(0x0225BB27692727BB), SPH_C64(0x7A3A5D40C040405D),
+	SPH_C64(0x568E9FD875D8D89F), SPH_C64(0x92A5EB37593737EB),
+	SPH_C64(0x76E4E092AB9292E0), SPH_C64(0x830C898F8C8F8F89),
+	SPH_C64(0x0908050103010105), SPH_C64(0xF5E8691D271D1D69),
+	SPH_C64(0xF1A20253F5535302), SPH_C64(0xD3EDC63E423E3EC6),
+	SPH_C64(0xABF22059EB595920), SPH_C64(0x8746E2C15EC1C1E2),
+	SPH_C64(0x0D426E4FD14F4F6E), SPH_C64(0xBF8DFA32563232FA),
+	SPH_C64(0xA6B04E163A16164E), SPH_C64(0x798335FA13FAFA35),
+	SPH_C64(0xF387B9749C7474B9), SPH_C64(0x708B30FB10FBFB30),
+	SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x138CD99FBC9F9FD9),
+	SPH_C64(0x89BDE4345C3434E4), SPH_C64(0xCAD0721A2E1A1A72),
+	SPH_C64(0x674D822A7E2A2A82), SPH_C64(0xB0EA2F5AEE5A5A2F),
+	SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xCF06CAC946C9C9CA),
+	SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x15E309F607F6F609),
+	SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x755D882878282888),
+	SPH_C64(0xBC34928885888892), SPH_C64(0x37ACCD9BB09B9BCD),
+	SPH_C64(0xA495F531533131F5), SPH_C64(0x7E70360E120E0E36),
+	SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x206A7F4ADE4A4A7F),
+	SPH_C64(0xFB136FE825E8E86F), SPH_C64(0x52C4F496A79696F4),
+	SPH_C64(0xFF5904A6F7A6A604), SPH_C64(0x6C603C0C140C0C3C),
+	SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0x96EF80798B797980),
+	SPH_C64(0x358976BCD9BCBC76), SPH_C64(0x27997CBEDFBEBE7C),
+	SPH_C64(0xC42B74EF2CEFEF74), SPH_C64(0x3957CB6EB26E6ECB),
+	SPH_C64(0x4C0A4346CA464643), SPH_C64(0x5BCCF197A49797F1),
+	SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xD63B7EED2AEDED7E),
+	SPH_C64(0xD1C87D192B19197D), SPH_C64(0x5F869AD976D9D99A),
+	SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0x25BCC799B69999C7),
+	SPH_C64(0x812932A8E5A8A832), SPH_C64(0x7C558D297B29298D),
+	SPH_C64(0x6307E964AC6464E9), SPH_C64(0xE7F8631F211F1F63),
+	SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xC7921C55FF55551C),
+	SPH_C64(0x8B985F133513135F), SPH_C64(0x0AB16DBBD0BBBB6D),
+	SPH_C64(0x1CEB0CF704F7F70C), SPH_C64(0x305FCE6FB16F6FCE),
+	SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x45024647C9474746),
+	SPH_C64(0x4A65932F712F2F93), SPH_C64(0xCD2371EE2FEEEE71),
+	SPH_C64(0x11A962B8D5B8B862), SPH_C64(0x84FF8A7B8D7B7B8A),
+	SPH_C64(0xB53C978986898997), SPH_C64(0xAD9DF030503030F0),
+	SPH_C64(0x05D6B8D368D3D3B8), SPH_C64(0xA0DF9E7F817F7F9E),
+	SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0)
+};
+
+static const sph_u64 old0_T2[256] = {
+	SPH_C64(0x67D568B86868D50F), SPH_C64(0xCEB7D06DD0D0B71E),
+	SPH_C64(0x0B60EB20EBEB60E0), SPH_C64(0x45872B7D2B2B876E),
+	SPH_C64(0x7A7548D848487532), SPH_C64(0x9CD39DBA9D9DD301),
+	SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0x7353E431E4E45397),
+	SPH_C64(0x4B48E338E3E348A8), SPH_C64(0x7115A3F8A3A315D2),
+	SPH_C64(0x8A1356FA565613DC), SPH_C64(0x7CBF819E8181BFFD),
+	SPH_C64(0xCF947D877D7D94B2), SPH_C64(0xDB12F10EF1F1122A),
+	SPH_C64(0x5CAB85928585ABD9), SPH_C64(0x84DC9EBF9E9EDC1A),
+	SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0x048C8E8F8E8E8C8A),
+	SPH_C64(0xE78578887878859F), SPH_C64(0x1EC5CA43CACAC5D4),
+	SPH_C64(0xB84B173917174BAF), SPH_C64(0x2137A9E6A9A93788),
+	SPH_C64(0x2FF861A36161F84E), SPH_C64(0xE6A6D562D5D5A633),
+	SPH_C64(0xD2345DE75D5D348F), SPH_C64(0x58270B1D0B0B2753),
+	SPH_C64(0x14868C898C8C8698), SPH_C64(0xFDCC3C443C3CCCC1),
+	SPH_C64(0x9FB677997777B6E8), SPH_C64(0xB20851F3515108E3),
+	SPH_C64(0x0DAA22662222AA2F), SPH_C64(0x2A5742C642425768),
+	SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x9A1954FC545419CE),
+	SPH_C64(0x325841C341415873), SPH_C64(0x74BA809D8080BAF4),
+	SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x44A486978686A4C2),
+	SPH_C64(0xF145B3C8B3B34542), SPH_C64(0xC0781828181878D8),
+	SPH_C64(0x6D962E722E2E9643), SPH_C64(0x821657F9575716D5),
+	SPH_C64(0x301E060A06061E36), SPH_C64(0x37F762A66262F755),
+	SPH_C64(0xF303F401F4F40307), SPH_C64(0xADEE365A3636EE9B),
+	SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x7FDA6BBD6B6BDA14),
+	SPH_C64(0xD8771B2D1B1B77C3), SPH_C64(0x0FEC65AF6565EC6A),
+	SPH_C64(0x8FBC759F7575BCFA), SPH_C64(0x8050103010105090),
+	SPH_C64(0x9E95DA73DADA9544), SPH_C64(0x727049DB4949703B),
+	SPH_C64(0x2DBE266A2626BE0B), SPH_C64(0x9B3AF916F9F93A62),
+	SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0x17E366AA6666E371),
+	SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0xB968BAD3BABA6803),
+	SPH_C64(0x192CAEEFAEAE2CB7), SPH_C64(0xBA0D50F050500DEA),
+	SPH_C64(0xAA0752F6525207F8), SPH_C64(0x313DABE0ABAB3D9A),
+	SPH_C64(0x2811050F0505112D), SPH_C64(0xD317F00DF0F01723),
+	SPH_C64(0x68390D170D0D3965), SPH_C64(0xBFA273957373A2CC),
+	SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x2014040C04041424),
+	SPH_C64(0x1DA020602020A03D), SPH_C64(0xA321FE1FFEFE215D),
+	SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xFB06F502F5F5060E),
+	SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0xC23E5FE15F5F3E9D),
+	SPH_C64(0x50220A1E0A0A225A), SPH_C64(0xC15BB5C2B5B55B74),
+	SPH_C64(0x4EE7C05DC0C0E78E), SPH_C64(0x691AA0FDA0A01AC9),
+	SPH_C64(0xAFA871937171A8DE), SPH_C64(0x410BA5F2A5A50BE4),
+	SPH_C64(0x75992D772D2D9958), SPH_C64(0x27FD60A06060FD47),
+	SPH_C64(0xB7A772967272A7C5), SPH_C64(0xECE593A89393E57F),
+	SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x4028081808082848),
+	SPH_C64(0x6CB583988383B5EF), SPH_C64(0x15A521632121A534),
+	SPH_C64(0xDA315CE45C5C3186), SPH_C64(0x4CA187948787A1CB),
+	SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x5347E03DE0E047B3),
+	SPH_C64(0x0000000000000000), SPH_C64(0x56E8C358C3C3E895),
+	SPH_C64(0x905A123612125A82), SPH_C64(0xFCEF91AE9191EF6D),
+	SPH_C64(0x24988A838A8A98AE), SPH_C64(0x100A020602020A12),
+	SPH_C64(0xE06C1C241C1C6CFC), SPH_C64(0x6359E637E6E65985),
+	SPH_C64(0x124C45CF45454C57), SPH_C64(0x5EEDC25BC2C2ED9C),
+	SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0xBB2EFD1AFDFD2E46),
+	SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x1A4944CC4444495E),
+	SPH_C64(0x611FA1FEA1A11FC0), SPH_C64(0x5A614CD44C4C6116),
+	SPH_C64(0x85FF33553333FFB6), SPH_C64(0x66F6C552C5C5F6A3),
+	SPH_C64(0x54AE84918484AED0), SPH_C64(0x05AF23652323AF26),
+	SPH_C64(0xC7917C847C7C91BB), SPH_C64(0xE94AB0CDB0B04A59),
+	SPH_C64(0x35B1256F2525B110), SPH_C64(0xA841153F151541BD),
+	SPH_C64(0xB5E1355F3535E180), SPH_C64(0x6FD069BB6969D006),
+	SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xD4FE94A19494FE40),
+	SPH_C64(0x52644DD74D4D641F), SPH_C64(0xA7AD70907070ADD7),
+	SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x1129AFECAFAF29BE),
+	SPH_C64(0x26DECD4ACDCDDEEB), SPH_C64(0xFEA9D667D6D6A928),
+	SPH_C64(0x47C16CB46C6CC12B), SPH_C64(0xD151B7C4B7B75166),
+	SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x482D091B09092D41),
+	SPH_C64(0xCB18F308F3F31838), SPH_C64(0x1FE667A96767E678),
+	SPH_C64(0x490EA4F1A4A40EED), SPH_C64(0x0365EA23EAEA65E9),
+	SPH_C64(0x337BEC29ECEC7BDF), SPH_C64(0xD954B6C7B6B6546F),
+	SPH_C64(0xEEA3D461D4D4A33A), SPH_C64(0xDEBDD26BD2D2BD0C),
+	SPH_C64(0xA044143C141444B4), SPH_C64(0xF0661E221E1E66EE),
+	SPH_C64(0x5B42E13EE1E142BA), SPH_C64(0x3DB4246C2424B419),
+	SPH_C64(0xDDD838483838D8E5), SPH_C64(0x7EF9C657C6C6F9B8),
+	SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x627A4BDD4B4B7A29),
+	SPH_C64(0xF78F7A8E7A7A8F8D), SPH_C64(0xCDD23A4E3A3AD2F7),
+	SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xCA3B5EE25E5E3B94),
+	SPH_C64(0xB684DF7CDFDF8469), SPH_C64(0xDCFB95A29595FB49),
+	SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x3938AAE3AAAA3893),
+	SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x3ED1CE4FCECED1F0),
+	SPH_C64(0x381B070907071B3F), SPH_C64(0x78330F110F0F3377),
+	SPH_C64(0xF5C93D473D3DC9C8), SPH_C64(0xFA2558E8585825A2),
+	SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0xB4C298B59898C22C),
+	SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xC31DF20BF2F21D31),
+	SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0x8855113311115599),
+	SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x2C9D8B808B8B9DA7),
+	SPH_C64(0x225243C543435261), SPH_C64(0x180F030503030F1B),
+	SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0xAE8BDC79DCDC8B72),
+	SPH_C64(0x7B56E532E5E5569E), SPH_C64(0xF940B2CBB2B2404B),
+	SPH_C64(0x4A6B4ED24E4E6B04), SPH_C64(0x76FCC754C7C7FCB1),
+	SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x1B6AE926E9E96AF2),
+	SPH_C64(0x25BB27692727BB02), SPH_C64(0x3A5D40C040405D7A),
+	SPH_C64(0x8E9FD875D8D89F56), SPH_C64(0xA5EB37593737EB92),
+	SPH_C64(0xE4E092AB9292E076), SPH_C64(0x0C898F8C8F8F8983),
+	SPH_C64(0x0805010301010509), SPH_C64(0xE8691D271D1D69F5),
+	SPH_C64(0xA20253F5535302F1), SPH_C64(0xEDC63E423E3EC6D3),
+	SPH_C64(0xF22059EB595920AB), SPH_C64(0x46E2C15EC1C1E287),
+	SPH_C64(0x426E4FD14F4F6E0D), SPH_C64(0x8DFA32563232FABF),
+	SPH_C64(0xB04E163A16164EA6), SPH_C64(0x8335FA13FAFA3579),
+	SPH_C64(0x87B9749C7474B9F3), SPH_C64(0x8B30FB10FBFB3070),
+	SPH_C64(0x3FF263A56363F25C), SPH_C64(0x8CD99FBC9F9FD913),
+	SPH_C64(0xBDE4345C3434E489), SPH_C64(0xD0721A2E1A1A72CA),
+	SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xEA2F5AEE5A5A2FB0),
+	SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0x06CAC946C9C9CACF),
+	SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0xE309F607F6F60915),
+	SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0x5D88287828288875),
+	SPH_C64(0x34928885888892BC), SPH_C64(0xACCD9BB09B9BCD37),
+	SPH_C64(0x95F531533131F5A4), SPH_C64(0x70360E120E0E367E),
+	SPH_C64(0x8173BDDABDBD733C), SPH_C64(0x6A7F4ADE4A4A7F20),
+	SPH_C64(0x136FE825E8E86FFB), SPH_C64(0xC4F496A79696F452),
+	SPH_C64(0x5904A6F7A6A604FF), SPH_C64(0x603C0C140C0C3C6C),
+	SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xEF80798B79798096),
+	SPH_C64(0x8976BCD9BCBC7635), SPH_C64(0x997CBEDFBEBE7C27),
+	SPH_C64(0x2B74EF2CEFEF74C4), SPH_C64(0x57CB6EB26E6ECB39),
+	SPH_C64(0x0A4346CA4646434C), SPH_C64(0xCCF197A49797F15B),
+	SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x3B7EED2AEDED7ED6),
+	SPH_C64(0xC87D192B19197DD1), SPH_C64(0x869AD976D9D99A5F),
+	SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0xBCC799B69999C725),
+	SPH_C64(0x2932A8E5A8A83281), SPH_C64(0x558D297B29298D7C),
+	SPH_C64(0x07E964AC6464E963), SPH_C64(0xF8631F211F1F63E7),
+	SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0x921C55FF55551CC7),
+	SPH_C64(0x985F133513135F8B), SPH_C64(0xB16DBBD0BBBB6D0A),
+	SPH_C64(0xEB0CF704F7F70C1C), SPH_C64(0x5FCE6FB16F6FCE30),
+	SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x024647C947474645),
+	SPH_C64(0x65932F712F2F934A), SPH_C64(0x2371EE2FEEEE71CD),
+	SPH_C64(0xA962B8D5B8B86211), SPH_C64(0xFF8A7B8D7B7B8A84),
+	SPH_C64(0x3C978986898997B5), SPH_C64(0x9DF030503030F0AD),
+	SPH_C64(0xD6B8D368D3D3B805), SPH_C64(0xDF9E7F817F7F9EA0),
+	SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6)
+};
+
+static const sph_u64 old0_T3[256] = {
+	SPH_C64(0xD568B86868D50F67), SPH_C64(0xB7D06DD0D0B71ECE),
+	SPH_C64(0x60EB20EBEB60E00B), SPH_C64(0x872B7D2B2B876E45),
+	SPH_C64(0x7548D8484875327A), SPH_C64(0xD39DBA9D9DD3019C),
+	SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x53E431E4E4539773),
+	SPH_C64(0x48E338E3E348A84B), SPH_C64(0x15A3F8A3A315D271),
+	SPH_C64(0x1356FA565613DC8A), SPH_C64(0xBF819E8181BFFD7C),
+	SPH_C64(0x947D877D7D94B2CF), SPH_C64(0x12F10EF1F1122ADB),
+	SPH_C64(0xAB85928585ABD95C), SPH_C64(0xDC9EBF9E9EDC1A84),
+	SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0x8C8E8F8E8E8C8A04),
+	SPH_C64(0x8578887878859FE7), SPH_C64(0xC5CA43CACAC5D41E),
+	SPH_C64(0x4B173917174BAFB8), SPH_C64(0x37A9E6A9A9378821),
+	SPH_C64(0xF861A36161F84E2F), SPH_C64(0xA6D562D5D5A633E6),
+	SPH_C64(0x345DE75D5D348FD2), SPH_C64(0x270B1D0B0B275358),
+	SPH_C64(0x868C898C8C869814), SPH_C64(0xCC3C443C3CCCC1FD),
+	SPH_C64(0xB677997777B6E89F), SPH_C64(0x0851F3515108E3B2),
+	SPH_C64(0xAA22662222AA2F0D), SPH_C64(0x5742C6424257682A),
+	SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1954FC545419CE9A),
+	SPH_C64(0x5841C34141587332), SPH_C64(0xBA809D8080BAF474),
+	SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0xA486978686A4C244),
+	SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0x781828181878D8C0),
+	SPH_C64(0x962E722E2E96436D), SPH_C64(0x1657F9575716D582),
+	SPH_C64(0x1E060A06061E3630), SPH_C64(0xF762A66262F75537),
+	SPH_C64(0x03F401F4F40307F3), SPH_C64(0xEE365A3636EE9BAD),
+	SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0xDA6BBD6B6BDA147F),
+	SPH_C64(0x771B2D1B1B77C3D8), SPH_C64(0xEC65AF6565EC6A0F),
+	SPH_C64(0xBC759F7575BCFA8F), SPH_C64(0x5010301010509080),
+	SPH_C64(0x95DA73DADA95449E), SPH_C64(0x7049DB4949703B72),
+	SPH_C64(0xBE266A2626BE0B2D), SPH_C64(0x3AF916F9F93A629B),
+	SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xE366AA6666E37117),
+	SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0x68BAD3BABA6803B9),
+	SPH_C64(0x2CAEEFAEAE2CB719), SPH_C64(0x0D50F050500DEABA),
+	SPH_C64(0x0752F6525207F8AA), SPH_C64(0x3DABE0ABAB3D9A31),
+	SPH_C64(0x11050F0505112D28), SPH_C64(0x17F00DF0F01723D3),
+	SPH_C64(0x390D170D0D396568), SPH_C64(0xA273957373A2CCBF),
+	SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x14040C0404142420),
+	SPH_C64(0xA020602020A03D1D), SPH_C64(0x21FE1FFEFE215DA3),
+	SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x06F502F5F5060EFB),
+	SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x3E5FE15F5F3E9DC2),
+	SPH_C64(0x220A1E0A0A225A50), SPH_C64(0x5BB5C2B5B55B74C1),
+	SPH_C64(0xE7C05DC0C0E78E4E), SPH_C64(0x1AA0FDA0A01AC969),
+	SPH_C64(0xA871937171A8DEAF), SPH_C64(0x0BA5F2A5A50BE441),
+	SPH_C64(0x992D772D2D995875), SPH_C64(0xFD60A06060FD4727),
+	SPH_C64(0xA772967272A7C5B7), SPH_C64(0xE593A89393E57FEC),
+	SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x2808180808284840),
+	SPH_C64(0xB583988383B5EF6C), SPH_C64(0xA521632121A53415),
+	SPH_C64(0x315CE45C5C3186DA), SPH_C64(0xA187948787A1CB4C),
+	SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x47E03DE0E047B353),
+	SPH_C64(0x0000000000000000), SPH_C64(0xE8C358C3C3E89556),
+	SPH_C64(0x5A123612125A8290), SPH_C64(0xEF91AE9191EF6DFC),
+	SPH_C64(0x988A838A8A98AE24), SPH_C64(0x0A020602020A1210),
+	SPH_C64(0x6C1C241C1C6CFCE0), SPH_C64(0x59E637E6E6598563),
+	SPH_C64(0x4C45CF45454C5712), SPH_C64(0xEDC25BC2C2ED9C5E),
+	SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x2EFD1AFDFD2E46BB),
+	SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x4944CC4444495E1A),
+	SPH_C64(0x1FA1FEA1A11FC061), SPH_C64(0x614CD44C4C61165A),
+	SPH_C64(0xFF33553333FFB685), SPH_C64(0xF6C552C5C5F6A366),
+	SPH_C64(0xAE84918484AED054), SPH_C64(0xAF23652323AF2605),
+	SPH_C64(0x917C847C7C91BBC7), SPH_C64(0x4AB0CDB0B04A59E9),
+	SPH_C64(0xB1256F2525B11035), SPH_C64(0x41153F151541BDA8),
+	SPH_C64(0xE1355F3535E180B5), SPH_C64(0xD069BB6969D0066F),
+	SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0xFE94A19494FE40D4),
+	SPH_C64(0x644DD74D4D641F52), SPH_C64(0xAD70907070ADD7A7),
+	SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x29AFECAFAF29BE11),
+	SPH_C64(0xDECD4ACDCDDEEB26), SPH_C64(0xA9D667D6D6A928FE),
+	SPH_C64(0xC16CB46C6CC12B47), SPH_C64(0x51B7C4B7B75166D1),
+	SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0x2D091B09092D4148),
+	SPH_C64(0x18F308F3F31838CB), SPH_C64(0xE667A96767E6781F),
+	SPH_C64(0x0EA4F1A4A40EED49), SPH_C64(0x65EA23EAEA65E903),
+	SPH_C64(0x7BEC29ECEC7BDF33), SPH_C64(0x54B6C7B6B6546FD9),
+	SPH_C64(0xA3D461D4D4A33AEE), SPH_C64(0xBDD26BD2D2BD0CDE),
+	SPH_C64(0x44143C141444B4A0), SPH_C64(0x661E221E1E66EEF0),
+	SPH_C64(0x42E13EE1E142BA5B), SPH_C64(0xB4246C2424B4193D),
+	SPH_C64(0xD838483838D8E5DD), SPH_C64(0xF9C657C6C6F9B87E),
+	SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x7A4BDD4B4B7A2962),
+	SPH_C64(0x8F7A8E7A7A8F8DF7), SPH_C64(0xD23A4E3A3AD2F7CD),
+	SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x3B5EE25E5E3B94CA),
+	SPH_C64(0x84DF7CDFDF8469B6), SPH_C64(0xFB95A29595FB49DC),
+	SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x38AAE3AAAA389339),
+	SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xD1CE4FCECED1F03E),
+	SPH_C64(0x1B070907071B3F38), SPH_C64(0x330F110F0F337778),
+	SPH_C64(0xC93D473D3DC9C8F5), SPH_C64(0x2558E8585825A2FA),
+	SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xC298B59898C22CB4),
+	SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x1DF20BF2F21D31C3),
+	SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x5511331111559988),
+	SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0x9D8B808B8B9DA72C),
+	SPH_C64(0x5243C54343526122), SPH_C64(0x0F030503030F1B18),
+	SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0x8BDC79DCDC8B72AE),
+	SPH_C64(0x56E532E5E5569E7B), SPH_C64(0x40B2CBB2B2404BF9),
+	SPH_C64(0x6B4ED24E4E6B044A), SPH_C64(0xFCC754C7C7FCB176),
+	SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x6AE926E9E96AF21B),
+	SPH_C64(0xBB27692727BB0225), SPH_C64(0x5D40C040405D7A3A),
+	SPH_C64(0x9FD875D8D89F568E), SPH_C64(0xEB37593737EB92A5),
+	SPH_C64(0xE092AB9292E076E4), SPH_C64(0x898F8C8F8F89830C),
+	SPH_C64(0x0501030101050908), SPH_C64(0x691D271D1D69F5E8),
+	SPH_C64(0x0253F5535302F1A2), SPH_C64(0xC63E423E3EC6D3ED),
+	SPH_C64(0x2059EB595920ABF2), SPH_C64(0xE2C15EC1C1E28746),
+	SPH_C64(0x6E4FD14F4F6E0D42), SPH_C64(0xFA32563232FABF8D),
+	SPH_C64(0x4E163A16164EA6B0), SPH_C64(0x35FA13FAFA357983),
+	SPH_C64(0xB9749C7474B9F387), SPH_C64(0x30FB10FBFB30708B),
+	SPH_C64(0xF263A56363F25C3F), SPH_C64(0xD99FBC9F9FD9138C),
+	SPH_C64(0xE4345C3434E489BD), SPH_C64(0x721A2E1A1A72CAD0),
+	SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x2F5AEE5A5A2FB0EA),
+	SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xCAC946C9C9CACF06),
+	SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x09F607F6F60915E3),
+	SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x882878282888755D),
+	SPH_C64(0x928885888892BC34), SPH_C64(0xCD9BB09B9BCD37AC),
+	SPH_C64(0xF531533131F5A495), SPH_C64(0x360E120E0E367E70),
+	SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x7F4ADE4A4A7F206A),
+	SPH_C64(0x6FE825E8E86FFB13), SPH_C64(0xF496A79696F452C4),
+	SPH_C64(0x04A6F7A6A604FF59), SPH_C64(0x3C0C140C0C3C6C60),
+	SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x80798B79798096EF),
+	SPH_C64(0x76BCD9BCBC763589), SPH_C64(0x7CBEDFBEBE7C2799),
+	SPH_C64(0x74EF2CEFEF74C42B), SPH_C64(0xCB6EB26E6ECB3957),
+	SPH_C64(0x4346CA4646434C0A), SPH_C64(0xF197A49797F15BCC),
+	SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x7EED2AEDED7ED63B),
+	SPH_C64(0x7D192B19197DD1C8), SPH_C64(0x9AD976D9D99A5F86),
+	SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0xC799B69999C725BC),
+	SPH_C64(0x32A8E5A8A8328129), SPH_C64(0x8D297B29298D7C55),
+	SPH_C64(0xE964AC6464E96307), SPH_C64(0x631F211F1F63E7F8),
+	SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x1C55FF55551CC792),
+	SPH_C64(0x5F133513135F8B98), SPH_C64(0x6DBBD0BBBB6D0AB1),
+	SPH_C64(0x0CF704F7F70C1CEB), SPH_C64(0xCE6FB16F6FCE305F),
+	SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x4647C94747464502),
+	SPH_C64(0x932F712F2F934A65), SPH_C64(0x71EE2FEEEE71CD23),
+	SPH_C64(0x62B8D5B8B86211A9), SPH_C64(0x8A7B8D7B7B8A84FF),
+	SPH_C64(0x978986898997B53C), SPH_C64(0xF030503030F0AD9D),
+	SPH_C64(0xB8D368D3D3B805D6), SPH_C64(0x9E7F817F7F9EA0DF),
+	SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664)
+};
+
+static const sph_u64 old0_T4[256] = {
+	SPH_C64(0x68B86868D50F67D5), SPH_C64(0xD06DD0D0B71ECEB7),
+	SPH_C64(0xEB20EBEB60E00B60), SPH_C64(0x2B7D2B2B876E4587),
+	SPH_C64(0x48D8484875327A75), SPH_C64(0x9DBA9D9DD3019CD3),
+	SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0xE431E4E453977353),
+	SPH_C64(0xE338E3E348A84B48), SPH_C64(0xA3F8A3A315D27115),
+	SPH_C64(0x56FA565613DC8A13), SPH_C64(0x819E8181BFFD7CBF),
+	SPH_C64(0x7D877D7D94B2CF94), SPH_C64(0xF10EF1F1122ADB12),
+	SPH_C64(0x85928585ABD95CAB), SPH_C64(0x9EBF9E9EDC1A84DC),
+	SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0x8E8F8E8E8C8A048C),
+	SPH_C64(0x78887878859FE785), SPH_C64(0xCA43CACAC5D41EC5),
+	SPH_C64(0x173917174BAFB84B), SPH_C64(0xA9E6A9A937882137),
+	SPH_C64(0x61A36161F84E2FF8), SPH_C64(0xD562D5D5A633E6A6),
+	SPH_C64(0x5DE75D5D348FD234), SPH_C64(0x0B1D0B0B27535827),
+	SPH_C64(0x8C898C8C86981486), SPH_C64(0x3C443C3CCCC1FDCC),
+	SPH_C64(0x77997777B6E89FB6), SPH_C64(0x51F3515108E3B208),
+	SPH_C64(0x22662222AA2F0DAA), SPH_C64(0x42C6424257682A57),
+	SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x54FC545419CE9A19),
+	SPH_C64(0x41C3414158733258), SPH_C64(0x809D8080BAF474BA),
+	SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x86978686A4C244A4),
+	SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x1828181878D8C078),
+	SPH_C64(0x2E722E2E96436D96), SPH_C64(0x57F9575716D58216),
+	SPH_C64(0x060A06061E36301E), SPH_C64(0x62A66262F75537F7),
+	SPH_C64(0xF401F4F40307F303), SPH_C64(0x365A3636EE9BADEE),
+	SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0x6BBD6B6BDA147FDA),
+	SPH_C64(0x1B2D1B1B77C3D877), SPH_C64(0x65AF6565EC6A0FEC),
+	SPH_C64(0x759F7575BCFA8FBC), SPH_C64(0x1030101050908050),
+	SPH_C64(0xDA73DADA95449E95), SPH_C64(0x49DB4949703B7270),
+	SPH_C64(0x266A2626BE0B2DBE), SPH_C64(0xF916F9F93A629B3A),
+	SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x66AA6666E37117E3),
+	SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0xBAD3BABA6803B968),
+	SPH_C64(0xAEEFAEAE2CB7192C), SPH_C64(0x50F050500DEABA0D),
+	SPH_C64(0x52F6525207F8AA07), SPH_C64(0xABE0ABAB3D9A313D),
+	SPH_C64(0x050F0505112D2811), SPH_C64(0xF00DF0F01723D317),
+	SPH_C64(0x0D170D0D39656839), SPH_C64(0x73957373A2CCBFA2),
+	SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0x040C040414242014),
+	SPH_C64(0x20602020A03D1DA0), SPH_C64(0xFE1FFEFE215DA321),
+	SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0xF502F5F5060EFB06),
+	SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x5FE15F5F3E9DC23E),
+	SPH_C64(0x0A1E0A0A225A5022), SPH_C64(0xB5C2B5B55B74C15B),
+	SPH_C64(0xC05DC0C0E78E4EE7), SPH_C64(0xA0FDA0A01AC9691A),
+	SPH_C64(0x71937171A8DEAFA8), SPH_C64(0xA5F2A5A50BE4410B),
+	SPH_C64(0x2D772D2D99587599), SPH_C64(0x60A06060FD4727FD),
+	SPH_C64(0x72967272A7C5B7A7), SPH_C64(0x93A89393E57FECE5),
+	SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x0818080828484028),
+	SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x21632121A53415A5),
+	SPH_C64(0x5CE45C5C3186DA31), SPH_C64(0x87948787A1CB4CA1),
+	SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xE03DE0E047B35347),
+	SPH_C64(0x0000000000000000), SPH_C64(0xC358C3C3E89556E8),
+	SPH_C64(0x123612125A82905A), SPH_C64(0x91AE9191EF6DFCEF),
+	SPH_C64(0x8A838A8A98AE2498), SPH_C64(0x020602020A12100A),
+	SPH_C64(0x1C241C1C6CFCE06C), SPH_C64(0xE637E6E659856359),
+	SPH_C64(0x45CF45454C57124C), SPH_C64(0xC25BC2C2ED9C5EED),
+	SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0xFD1AFDFD2E46BB2E),
+	SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x44CC4444495E1A49),
+	SPH_C64(0xA1FEA1A11FC0611F), SPH_C64(0x4CD44C4C61165A61),
+	SPH_C64(0x33553333FFB685FF), SPH_C64(0xC552C5C5F6A366F6),
+	SPH_C64(0x84918484AED054AE), SPH_C64(0x23652323AF2605AF),
+	SPH_C64(0x7C847C7C91BBC791), SPH_C64(0xB0CDB0B04A59E94A),
+	SPH_C64(0x256F2525B11035B1), SPH_C64(0x153F151541BDA841),
+	SPH_C64(0x355F3535E180B5E1), SPH_C64(0x69BB6969D0066FD0),
+	SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x94A19494FE40D4FE),
+	SPH_C64(0x4DD74D4D641F5264), SPH_C64(0x70907070ADD7A7AD),
+	SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xAFECAFAF29BE1129),
+	SPH_C64(0xCD4ACDCDDEEB26DE), SPH_C64(0xD667D6D6A928FEA9),
+	SPH_C64(0x6CB46C6CC12B47C1), SPH_C64(0xB7C4B7B75166D151),
+	SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x091B09092D41482D),
+	SPH_C64(0xF308F3F31838CB18), SPH_C64(0x67A96767E6781FE6),
+	SPH_C64(0xA4F1A4A40EED490E), SPH_C64(0xEA23EAEA65E90365),
+	SPH_C64(0xEC29ECEC7BDF337B), SPH_C64(0xB6C7B6B6546FD954),
+	SPH_C64(0xD461D4D4A33AEEA3), SPH_C64(0xD26BD2D2BD0CDEBD),
+	SPH_C64(0x143C141444B4A044), SPH_C64(0x1E221E1E66EEF066),
+	SPH_C64(0xE13EE1E142BA5B42), SPH_C64(0x246C2424B4193DB4),
+	SPH_C64(0x38483838D8E5DDD8), SPH_C64(0xC657C6C6F9B87EF9),
+	SPH_C64(0xDB70DBDB904D9690), SPH_C64(0x4BDD4B4B7A29627A),
+	SPH_C64(0x7A8E7A7A8F8DF78F), SPH_C64(0x3A4E3A3AD2F7CDD2),
+	SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x5EE25E5E3B94CA3B),
+	SPH_C64(0xDF7CDFDF8469B684), SPH_C64(0x95A29595FB49DCFB),
+	SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0xAAE3AAAA38933938),
+	SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xCE4FCECED1F03ED1),
+	SPH_C64(0x070907071B3F381B), SPH_C64(0x0F110F0F33777833),
+	SPH_C64(0x3D473D3DC9C8F5C9), SPH_C64(0x58E8585825A2FA25),
+	SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x98B59898C22CB4C2),
+	SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0xF20BF2F21D31C31D),
+	SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x1133111155998855),
+	SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x8B808B8B9DA72C9D),
+	SPH_C64(0x43C5434352612252), SPH_C64(0x030503030F1B180F),
+	SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0xDC79DCDC8B72AE8B),
+	SPH_C64(0xE532E5E5569E7B56), SPH_C64(0xB2CBB2B2404BF940),
+	SPH_C64(0x4ED24E4E6B044A6B), SPH_C64(0xC754C7C7FCB176FC),
+	SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0xE926E9E96AF21B6A),
+	SPH_C64(0x27692727BB0225BB), SPH_C64(0x40C040405D7A3A5D),
+	SPH_C64(0xD875D8D89F568E9F), SPH_C64(0x37593737EB92A5EB),
+	SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x8F8C8F8F89830C89),
+	SPH_C64(0x0103010105090805), SPH_C64(0x1D271D1D69F5E869),
+	SPH_C64(0x53F5535302F1A202), SPH_C64(0x3E423E3EC6D3EDC6),
+	SPH_C64(0x59EB595920ABF220), SPH_C64(0xC15EC1C1E28746E2),
+	SPH_C64(0x4FD14F4F6E0D426E), SPH_C64(0x32563232FABF8DFA),
+	SPH_C64(0x163A16164EA6B04E), SPH_C64(0xFA13FAFA35798335),
+	SPH_C64(0x749C7474B9F387B9), SPH_C64(0xFB10FBFB30708B30),
+	SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x9FBC9F9FD9138CD9),
+	SPH_C64(0x345C3434E489BDE4), SPH_C64(0x1A2E1A1A72CAD072),
+	SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0x5AEE5A5A2FB0EA2F),
+	SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0xC946C9C9CACF06CA),
+	SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0xF607F6F60915E309),
+	SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x2878282888755D88),
+	SPH_C64(0x8885888892BC3492), SPH_C64(0x9BB09B9BCD37ACCD),
+	SPH_C64(0x31533131F5A495F5), SPH_C64(0x0E120E0E367E7036),
+	SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x4ADE4A4A7F206A7F),
+	SPH_C64(0xE825E8E86FFB136F), SPH_C64(0x96A79696F452C4F4),
+	SPH_C64(0xA6F7A6A604FF5904), SPH_C64(0x0C140C0C3C6C603C),
+	SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x798B79798096EF80),
+	SPH_C64(0xBCD9BCBC76358976), SPH_C64(0xBEDFBEBE7C27997C),
+	SPH_C64(0xEF2CEFEF74C42B74), SPH_C64(0x6EB26E6ECB3957CB),
+	SPH_C64(0x46CA4646434C0A43), SPH_C64(0x97A49797F15BCCF1),
+	SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0xED2AEDED7ED63B7E),
+	SPH_C64(0x192B19197DD1C87D), SPH_C64(0xD976D9D99A5F869A),
+	SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x99B69999C725BCC7),
+	SPH_C64(0xA8E5A8A832812932), SPH_C64(0x297B29298D7C558D),
+	SPH_C64(0x64AC6464E96307E9), SPH_C64(0x1F211F1F63E7F863),
+	SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x55FF55551CC7921C),
+	SPH_C64(0x133513135F8B985F), SPH_C64(0xBBD0BBBB6D0AB16D),
+	SPH_C64(0xF704F7F70C1CEB0C), SPH_C64(0x6FB16F6FCE305FCE),
+	SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x47C9474746450246),
+	SPH_C64(0x2F712F2F934A6593), SPH_C64(0xEE2FEEEE71CD2371),
+	SPH_C64(0xB8D5B8B86211A962), SPH_C64(0x7B8D7B7B8A84FF8A),
+	SPH_C64(0x8986898997B53C97), SPH_C64(0x30503030F0AD9DF0),
+	SPH_C64(0xD368D3D3B805D6B8), SPH_C64(0x7F817F7F9EA0DF9E),
+	SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0)
+};
+
+static const sph_u64 old0_T5[256] = {
+	SPH_C64(0xB86868D50F67D568), SPH_C64(0x6DD0D0B71ECEB7D0),
+	SPH_C64(0x20EBEB60E00B60EB), SPH_C64(0x7D2B2B876E45872B),
+	SPH_C64(0xD8484875327A7548), SPH_C64(0xBA9D9DD3019CD39D),
+	SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0x31E4E453977353E4),
+	SPH_C64(0x38E3E348A84B48E3), SPH_C64(0xF8A3A315D27115A3),
+	SPH_C64(0xFA565613DC8A1356), SPH_C64(0x9E8181BFFD7CBF81),
+	SPH_C64(0x877D7D94B2CF947D), SPH_C64(0x0EF1F1122ADB12F1),
+	SPH_C64(0x928585ABD95CAB85), SPH_C64(0xBF9E9EDC1A84DC9E),
+	SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x8F8E8E8C8A048C8E),
+	SPH_C64(0x887878859FE78578), SPH_C64(0x43CACAC5D41EC5CA),
+	SPH_C64(0x3917174BAFB84B17), SPH_C64(0xE6A9A937882137A9),
+	SPH_C64(0xA36161F84E2FF861), SPH_C64(0x62D5D5A633E6A6D5),
+	SPH_C64(0xE75D5D348FD2345D), SPH_C64(0x1D0B0B275358270B),
+	SPH_C64(0x898C8C869814868C), SPH_C64(0x443C3CCCC1FDCC3C),
+	SPH_C64(0x997777B6E89FB677), SPH_C64(0xF3515108E3B20851),
+	SPH_C64(0x662222AA2F0DAA22), SPH_C64(0xC6424257682A5742),
+	SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFC545419CE9A1954),
+	SPH_C64(0xC341415873325841), SPH_C64(0x9D8080BAF474BA80),
+	SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0x978686A4C244A486),
+	SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x28181878D8C07818),
+	SPH_C64(0x722E2E96436D962E), SPH_C64(0xF9575716D5821657),
+	SPH_C64(0x0A06061E36301E06), SPH_C64(0xA66262F75537F762),
+	SPH_C64(0x01F4F40307F303F4), SPH_C64(0x5A3636EE9BADEE36),
+	SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xBD6B6BDA147FDA6B),
+	SPH_C64(0x2D1B1B77C3D8771B), SPH_C64(0xAF6565EC6A0FEC65),
+	SPH_C64(0x9F7575BCFA8FBC75), SPH_C64(0x3010105090805010),
+	SPH_C64(0x73DADA95449E95DA), SPH_C64(0xDB4949703B727049),
+	SPH_C64(0x6A2626BE0B2DBE26), SPH_C64(0x16F9F93A629B3AF9),
+	SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0xAA6666E37117E366),
+	SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xD3BABA6803B968BA),
+	SPH_C64(0xEFAEAE2CB7192CAE), SPH_C64(0xF050500DEABA0D50),
+	SPH_C64(0xF6525207F8AA0752), SPH_C64(0xE0ABAB3D9A313DAB),
+	SPH_C64(0x0F0505112D281105), SPH_C64(0x0DF0F01723D317F0),
+	SPH_C64(0x170D0D396568390D), SPH_C64(0x957373A2CCBFA273),
+	SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0x0C04041424201404),
+	SPH_C64(0x602020A03D1DA020), SPH_C64(0x1FFEFE215DA321FE),
+	SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x02F5F5060EFB06F5),
+	SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xE15F5F3E9DC23E5F),
+	SPH_C64(0x1E0A0A225A50220A), SPH_C64(0xC2B5B55B74C15BB5),
+	SPH_C64(0x5DC0C0E78E4EE7C0), SPH_C64(0xFDA0A01AC9691AA0),
+	SPH_C64(0x937171A8DEAFA871), SPH_C64(0xF2A5A50BE4410BA5),
+	SPH_C64(0x772D2D995875992D), SPH_C64(0xA06060FD4727FD60),
+	SPH_C64(0x967272A7C5B7A772), SPH_C64(0xA89393E57FECE593),
+	SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0x1808082848402808),
+	SPH_C64(0x988383B5EF6CB583), SPH_C64(0x632121A53415A521),
+	SPH_C64(0xE45C5C3186DA315C), SPH_C64(0x948787A1CB4CA187),
+	SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0x3DE0E047B35347E0),
+	SPH_C64(0x0000000000000000), SPH_C64(0x58C3C3E89556E8C3),
+	SPH_C64(0x3612125A82905A12), SPH_C64(0xAE9191EF6DFCEF91),
+	SPH_C64(0x838A8A98AE24988A), SPH_C64(0x0602020A12100A02),
+	SPH_C64(0x241C1C6CFCE06C1C), SPH_C64(0x37E6E659856359E6),
+	SPH_C64(0xCF45454C57124C45), SPH_C64(0x5BC2C2ED9C5EEDC2),
+	SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x1AFDFD2E46BB2EFD),
+	SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0xCC4444495E1A4944),
+	SPH_C64(0xFEA1A11FC0611FA1), SPH_C64(0xD44C4C61165A614C),
+	SPH_C64(0x553333FFB685FF33), SPH_C64(0x52C5C5F6A366F6C5),
+	SPH_C64(0x918484AED054AE84), SPH_C64(0x652323AF2605AF23),
+	SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xCDB0B04A59E94AB0),
+	SPH_C64(0x6F2525B11035B125), SPH_C64(0x3F151541BDA84115),
+	SPH_C64(0x5F3535E180B5E135), SPH_C64(0xBB6969D0066FD069),
+	SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0xA19494FE40D4FE94),
+	SPH_C64(0xD74D4D641F52644D), SPH_C64(0x907070ADD7A7AD70),
+	SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0xECAFAF29BE1129AF),
+	SPH_C64(0x4ACDCDDEEB26DECD), SPH_C64(0x67D6D6A928FEA9D6),
+	SPH_C64(0xB46C6CC12B47C16C), SPH_C64(0xC4B7B75166D151B7),
+	SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x1B09092D41482D09),
+	SPH_C64(0x08F3F31838CB18F3), SPH_C64(0xA96767E6781FE667),
+	SPH_C64(0xF1A4A40EED490EA4), SPH_C64(0x23EAEA65E90365EA),
+	SPH_C64(0x29ECEC7BDF337BEC), SPH_C64(0xC7B6B6546FD954B6),
+	SPH_C64(0x61D4D4A33AEEA3D4), SPH_C64(0x6BD2D2BD0CDEBDD2),
+	SPH_C64(0x3C141444B4A04414), SPH_C64(0x221E1E66EEF0661E),
+	SPH_C64(0x3EE1E142BA5B42E1), SPH_C64(0x6C2424B4193DB424),
+	SPH_C64(0x483838D8E5DDD838), SPH_C64(0x57C6C6F9B87EF9C6),
+	SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xDD4B4B7A29627A4B),
+	SPH_C64(0x8E7A7A8F8DF78F7A), SPH_C64(0x4E3A3AD2F7CDD23A),
+	SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0xE25E5E3B94CA3B5E),
+	SPH_C64(0x7CDFDF8469B684DF), SPH_C64(0xA29595FB49DCFB95),
+	SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0xE3AAAA38933938AA),
+	SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x4FCECED1F03ED1CE),
+	SPH_C64(0x0907071B3F381B07), SPH_C64(0x110F0F337778330F),
+	SPH_C64(0x473D3DC9C8F5C93D), SPH_C64(0xE8585825A2FA2558),
+	SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0xB59898C22CB4C298),
+	SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x0BF2F21D31C31DF2),
+	SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x3311115599885511),
+	SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x808B8B9DA72C9D8B),
+	SPH_C64(0xC543435261225243), SPH_C64(0x0503030F1B180F03),
+	SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0x79DCDC8B72AE8BDC),
+	SPH_C64(0x32E5E5569E7B56E5), SPH_C64(0xCBB2B2404BF940B2),
+	SPH_C64(0xD24E4E6B044A6B4E), SPH_C64(0x54C7C7FCB176FCC7),
+	SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x26E9E96AF21B6AE9),
+	SPH_C64(0x692727BB0225BB27), SPH_C64(0xC040405D7A3A5D40),
+	SPH_C64(0x75D8D89F568E9FD8), SPH_C64(0x593737EB92A5EB37),
+	SPH_C64(0xAB9292E076E4E092), SPH_C64(0x8C8F8F89830C898F),
+	SPH_C64(0x0301010509080501), SPH_C64(0x271D1D69F5E8691D),
+	SPH_C64(0xF5535302F1A20253), SPH_C64(0x423E3EC6D3EDC63E),
+	SPH_C64(0xEB595920ABF22059), SPH_C64(0x5EC1C1E28746E2C1),
+	SPH_C64(0xD14F4F6E0D426E4F), SPH_C64(0x563232FABF8DFA32),
+	SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x13FAFA35798335FA),
+	SPH_C64(0x9C7474B9F387B974), SPH_C64(0x10FBFB30708B30FB),
+	SPH_C64(0xA56363F25C3FF263), SPH_C64(0xBC9F9FD9138CD99F),
+	SPH_C64(0x5C3434E489BDE434), SPH_C64(0x2E1A1A72CAD0721A),
+	SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xEE5A5A2FB0EA2F5A),
+	SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x46C9C9CACF06CAC9),
+	SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x07F6F60915E309F6),
+	SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0x78282888755D8828),
+	SPH_C64(0x85888892BC349288), SPH_C64(0xB09B9BCD37ACCD9B),
+	SPH_C64(0x533131F5A495F531), SPH_C64(0x120E0E367E70360E),
+	SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xDE4A4A7F206A7F4A),
+	SPH_C64(0x25E8E86FFB136FE8), SPH_C64(0xA79696F452C4F496),
+	SPH_C64(0xF7A6A604FF5904A6), SPH_C64(0x140C0C3C6C603C0C),
+	SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x8B79798096EF8079),
+	SPH_C64(0xD9BCBC76358976BC), SPH_C64(0xDFBEBE7C27997CBE),
+	SPH_C64(0x2CEFEF74C42B74EF), SPH_C64(0xB26E6ECB3957CB6E),
+	SPH_C64(0xCA4646434C0A4346), SPH_C64(0xA49797F15BCCF197),
+	SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x2AEDED7ED63B7EED),
+	SPH_C64(0x2B19197DD1C87D19), SPH_C64(0x76D9D99A5F869AD9),
+	SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0xB69999C725BCC799),
+	SPH_C64(0xE5A8A832812932A8), SPH_C64(0x7B29298D7C558D29),
+	SPH_C64(0xAC6464E96307E964), SPH_C64(0x211F1F63E7F8631F),
+	SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xFF55551CC7921C55),
+	SPH_C64(0x3513135F8B985F13), SPH_C64(0xD0BBBB6D0AB16DBB),
+	SPH_C64(0x04F7F70C1CEB0CF7), SPH_C64(0xB16F6FCE305FCE6F),
+	SPH_C64(0xD6B9B96718A167B9), SPH_C64(0xC947474645024647),
+	SPH_C64(0x712F2F934A65932F), SPH_C64(0x2FEEEE71CD2371EE),
+	SPH_C64(0xD5B8B86211A962B8), SPH_C64(0x8D7B7B8A84FF8A7B),
+	SPH_C64(0x86898997B53C9789), SPH_C64(0x503030F0AD9DF030),
+	SPH_C64(0x68D3D3B805D6B8D3), SPH_C64(0x817F7F9EA0DF9E7F),
+	SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082)
+};
+
+static const sph_u64 old0_T6[256] = {
+	SPH_C64(0x6868D50F67D568B8), SPH_C64(0xD0D0B71ECEB7D06D),
+	SPH_C64(0xEBEB60E00B60EB20), SPH_C64(0x2B2B876E45872B7D),
+	SPH_C64(0x484875327A7548D8), SPH_C64(0x9D9DD3019CD39DBA),
+	SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0xE4E453977353E431),
+	SPH_C64(0xE3E348A84B48E338), SPH_C64(0xA3A315D27115A3F8),
+	SPH_C64(0x565613DC8A1356FA), SPH_C64(0x8181BFFD7CBF819E),
+	SPH_C64(0x7D7D94B2CF947D87), SPH_C64(0xF1F1122ADB12F10E),
+	SPH_C64(0x8585ABD95CAB8592), SPH_C64(0x9E9EDC1A84DC9EBF),
+	SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0x8E8E8C8A048C8E8F),
+	SPH_C64(0x7878859FE7857888), SPH_C64(0xCACAC5D41EC5CA43),
+	SPH_C64(0x17174BAFB84B1739), SPH_C64(0xA9A937882137A9E6),
+	SPH_C64(0x6161F84E2FF861A3), SPH_C64(0xD5D5A633E6A6D562),
+	SPH_C64(0x5D5D348FD2345DE7), SPH_C64(0x0B0B275358270B1D),
+	SPH_C64(0x8C8C869814868C89), SPH_C64(0x3C3CCCC1FDCC3C44),
+	SPH_C64(0x7777B6E89FB67799), SPH_C64(0x515108E3B20851F3),
+	SPH_C64(0x2222AA2F0DAA2266), SPH_C64(0x424257682A5742C6),
+	SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x545419CE9A1954FC),
+	SPH_C64(0x41415873325841C3), SPH_C64(0x8080BAF474BA809D),
+	SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x8686A4C244A48697),
+	SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x181878D8C0781828),
+	SPH_C64(0x2E2E96436D962E72), SPH_C64(0x575716D5821657F9),
+	SPH_C64(0x06061E36301E060A), SPH_C64(0x6262F75537F762A6),
+	SPH_C64(0xF4F40307F303F401), SPH_C64(0x3636EE9BADEE365A),
+	SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0x6B6BDA147FDA6BBD),
+	SPH_C64(0x1B1B77C3D8771B2D), SPH_C64(0x6565EC6A0FEC65AF),
+	SPH_C64(0x7575BCFA8FBC759F), SPH_C64(0x1010509080501030),
+	SPH_C64(0xDADA95449E95DA73), SPH_C64(0x4949703B727049DB),
+	SPH_C64(0x2626BE0B2DBE266A), SPH_C64(0xF9F93A629B3AF916),
+	SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x6666E37117E366AA),
+	SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0xBABA6803B968BAD3),
+	SPH_C64(0xAEAE2CB7192CAEEF), SPH_C64(0x50500DEABA0D50F0),
+	SPH_C64(0x525207F8AA0752F6), SPH_C64(0xABAB3D9A313DABE0),
+	SPH_C64(0x0505112D2811050F), SPH_C64(0xF0F01723D317F00D),
+	SPH_C64(0x0D0D396568390D17), SPH_C64(0x7373A2CCBFA27395),
+	SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0x040414242014040C),
+	SPH_C64(0x2020A03D1DA02060), SPH_C64(0xFEFE215DA321FE1F),
+	SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0xF5F5060EFB06F502),
+	SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x5F5F3E9DC23E5FE1),
+	SPH_C64(0x0A0A225A50220A1E), SPH_C64(0xB5B55B74C15BB5C2),
+	SPH_C64(0xC0C0E78E4EE7C05D), SPH_C64(0xA0A01AC9691AA0FD),
+	SPH_C64(0x7171A8DEAFA87193), SPH_C64(0xA5A50BE4410BA5F2),
+	SPH_C64(0x2D2D995875992D77), SPH_C64(0x6060FD4727FD60A0),
+	SPH_C64(0x7272A7C5B7A77296), SPH_C64(0x9393E57FECE593A8),
+	SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x0808284840280818),
+	SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x2121A53415A52163),
+	SPH_C64(0x5C5C3186DA315CE4), SPH_C64(0x8787A1CB4CA18794),
+	SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xE0E047B35347E03D),
+	SPH_C64(0x0000000000000000), SPH_C64(0xC3C3E89556E8C358),
+	SPH_C64(0x12125A82905A1236), SPH_C64(0x9191EF6DFCEF91AE),
+	SPH_C64(0x8A8A98AE24988A83), SPH_C64(0x02020A12100A0206),
+	SPH_C64(0x1C1C6CFCE06C1C24), SPH_C64(0xE6E659856359E637),
+	SPH_C64(0x45454C57124C45CF), SPH_C64(0xC2C2ED9C5EEDC25B),
+	SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0xFDFD2E46BB2EFD1A),
+	SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x4444495E1A4944CC),
+	SPH_C64(0xA1A11FC0611FA1FE), SPH_C64(0x4C4C61165A614CD4),
+	SPH_C64(0x3333FFB685FF3355), SPH_C64(0xC5C5F6A366F6C552),
+	SPH_C64(0x8484AED054AE8491), SPH_C64(0x2323AF2605AF2365),
+	SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0xB0B04A59E94AB0CD),
+	SPH_C64(0x2525B11035B1256F), SPH_C64(0x151541BDA841153F),
+	SPH_C64(0x3535E180B5E1355F), SPH_C64(0x6969D0066FD069BB),
+	SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x9494FE40D4FE94A1),
+	SPH_C64(0x4D4D641F52644DD7), SPH_C64(0x7070ADD7A7AD7090),
+	SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xAFAF29BE1129AFEC),
+	SPH_C64(0xCDCDDEEB26DECD4A), SPH_C64(0xD6D6A928FEA9D667),
+	SPH_C64(0x6C6CC12B47C16CB4), SPH_C64(0xB7B75166D151B7C4),
+	SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x09092D41482D091B),
+	SPH_C64(0xF3F31838CB18F308), SPH_C64(0x6767E6781FE667A9),
+	SPH_C64(0xA4A40EED490EA4F1), SPH_C64(0xEAEA65E90365EA23),
+	SPH_C64(0xECEC7BDF337BEC29), SPH_C64(0xB6B6546FD954B6C7),
+	SPH_C64(0xD4D4A33AEEA3D461), SPH_C64(0xD2D2BD0CDEBDD26B),
+	SPH_C64(0x141444B4A044143C), SPH_C64(0x1E1E66EEF0661E22),
+	SPH_C64(0xE1E142BA5B42E13E), SPH_C64(0x2424B4193DB4246C),
+	SPH_C64(0x3838D8E5DDD83848), SPH_C64(0xC6C6F9B87EF9C657),
+	SPH_C64(0xDBDB904D9690DB70), SPH_C64(0x4B4B7A29627A4BDD),
+	SPH_C64(0x7A7A8F8DF78F7A8E), SPH_C64(0x3A3AD2F7CDD23A4E),
+	SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x5E5E3B94CA3B5EE2),
+	SPH_C64(0xDFDF8469B684DF7C), SPH_C64(0x9595FB49DCFB95A2),
+	SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0xAAAA38933938AAE3),
+	SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xCECED1F03ED1CE4F),
+	SPH_C64(0x07071B3F381B0709), SPH_C64(0x0F0F337778330F11),
+	SPH_C64(0x3D3DC9C8F5C93D47), SPH_C64(0x585825A2FA2558E8),
+	SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x9898C22CB4C298B5),
+	SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0xF2F21D31C31DF20B),
+	SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x1111559988551133),
+	SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x8B8B9DA72C9D8B80),
+	SPH_C64(0x43435261225243C5), SPH_C64(0x03030F1B180F0305),
+	SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0xDCDC8B72AE8BDC79),
+	SPH_C64(0xE5E5569E7B56E532), SPH_C64(0xB2B2404BF940B2CB),
+	SPH_C64(0x4E4E6B044A6B4ED2), SPH_C64(0xC7C7FCB176FCC754),
+	SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0xE9E96AF21B6AE926),
+	SPH_C64(0x2727BB0225BB2769), SPH_C64(0x40405D7A3A5D40C0),
+	SPH_C64(0xD8D89F568E9FD875), SPH_C64(0x3737EB92A5EB3759),
+	SPH_C64(0x9292E076E4E092AB), SPH_C64(0x8F8F89830C898F8C),
+	SPH_C64(0x0101050908050103), SPH_C64(0x1D1D69F5E8691D27),
+	SPH_C64(0x535302F1A20253F5), SPH_C64(0x3E3EC6D3EDC63E42),
+	SPH_C64(0x595920ABF22059EB), SPH_C64(0xC1C1E28746E2C15E),
+	SPH_C64(0x4F4F6E0D426E4FD1), SPH_C64(0x3232FABF8DFA3256),
+	SPH_C64(0x16164EA6B04E163A), SPH_C64(0xFAFA35798335FA13),
+	SPH_C64(0x7474B9F387B9749C), SPH_C64(0xFBFB30708B30FB10),
+	SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x9F9FD9138CD99FBC),
+	SPH_C64(0x3434E489BDE4345C), SPH_C64(0x1A1A72CAD0721A2E),
+	SPH_C64(0x2A2A82674D822A7E), SPH_C64(0x5A5A2FB0EA2F5AEE),
+	SPH_C64(0x8D8D83911C838D8A), SPH_C64(0xC9C9CACF06CAC946),
+	SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0xF6F60915E309F607),
+	SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x282888755D882878),
+	SPH_C64(0x888892BC34928885), SPH_C64(0x9B9BCD37ACCD9BB0),
+	SPH_C64(0x3131F5A495F53153), SPH_C64(0x0E0E367E70360E12),
+	SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x4A4A7F206A7F4ADE),
+	SPH_C64(0xE8E86FFB136FE825), SPH_C64(0x9696F452C4F496A7),
+	SPH_C64(0xA6A604FF5904A6F7), SPH_C64(0x0C0C3C6C603C0C14),
+	SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x79798096EF80798B),
+	SPH_C64(0xBCBC76358976BCD9), SPH_C64(0xBEBE7C27997CBEDF),
+	SPH_C64(0xEFEF74C42B74EF2C), SPH_C64(0x6E6ECB3957CB6EB2),
+	SPH_C64(0x4646434C0A4346CA), SPH_C64(0x9797F15BCCF197A4),
+	SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0xEDED7ED63B7EED2A),
+	SPH_C64(0x19197DD1C87D192B), SPH_C64(0xD9D99A5F869AD976),
+	SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x9999C725BCC799B6),
+	SPH_C64(0xA8A832812932A8E5), SPH_C64(0x29298D7C558D297B),
+	SPH_C64(0x6464E96307E964AC), SPH_C64(0x1F1F63E7F8631F21),
+	SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x55551CC7921C55FF),
+	SPH_C64(0x13135F8B985F1335), SPH_C64(0xBBBB6D0AB16DBBD0),
+	SPH_C64(0xF7F70C1CEB0CF704), SPH_C64(0x6F6FCE305FCE6FB1),
+	SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x47474645024647C9),
+	SPH_C64(0x2F2F934A65932F71), SPH_C64(0xEEEE71CD2371EE2F),
+	SPH_C64(0xB8B86211A962B8D5), SPH_C64(0x7B7B8A84FF8A7B8D),
+	SPH_C64(0x898997B53C978986), SPH_C64(0x3030F0AD9DF03050),
+	SPH_C64(0xD3D3B805D6B8D368), SPH_C64(0x7F7F9EA0DF9E7F81),
+	SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B)
+};
+
+static const sph_u64 old0_T7[256] = {
+	SPH_C64(0x68D50F67D568B868), SPH_C64(0xD0B71ECEB7D06DD0),
+	SPH_C64(0xEB60E00B60EB20EB), SPH_C64(0x2B876E45872B7D2B),
+	SPH_C64(0x4875327A7548D848), SPH_C64(0x9DD3019CD39DBA9D),
+	SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0xE453977353E431E4),
+	SPH_C64(0xE348A84B48E338E3), SPH_C64(0xA315D27115A3F8A3),
+	SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x81BFFD7CBF819E81),
+	SPH_C64(0x7D94B2CF947D877D), SPH_C64(0xF1122ADB12F10EF1),
+	SPH_C64(0x85ABD95CAB859285), SPH_C64(0x9EDC1A84DC9EBF9E),
+	SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0x8E8C8A048C8E8F8E),
+	SPH_C64(0x78859FE785788878), SPH_C64(0xCAC5D41EC5CA43CA),
+	SPH_C64(0x174BAFB84B173917), SPH_C64(0xA937882137A9E6A9),
+	SPH_C64(0x61F84E2FF861A361), SPH_C64(0xD5A633E6A6D562D5),
+	SPH_C64(0x5D348FD2345DE75D), SPH_C64(0x0B275358270B1D0B),
+	SPH_C64(0x8C869814868C898C), SPH_C64(0x3CCCC1FDCC3C443C),
+	SPH_C64(0x77B6E89FB6779977), SPH_C64(0x5108E3B20851F351),
+	SPH_C64(0x22AA2F0DAA226622), SPH_C64(0x4257682A5742C642),
+	SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x5419CE9A1954FC54),
+	SPH_C64(0x415873325841C341), SPH_C64(0x80BAF474BA809D80),
+	SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x86A4C244A4869786),
+	SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x1878D8C078182818),
+	SPH_C64(0x2E96436D962E722E), SPH_C64(0x5716D5821657F957),
+	SPH_C64(0x061E36301E060A06), SPH_C64(0x62F75537F762A662),
+	SPH_C64(0xF40307F303F401F4), SPH_C64(0x36EE9BADEE365A36),
+	SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0x6BDA147FDA6BBD6B),
+	SPH_C64(0x1B77C3D8771B2D1B), SPH_C64(0x65EC6A0FEC65AF65),
+	SPH_C64(0x75BCFA8FBC759F75), SPH_C64(0x1050908050103010),
+	SPH_C64(0xDA95449E95DA73DA), SPH_C64(0x49703B727049DB49),
+	SPH_C64(0x26BE0B2DBE266A26), SPH_C64(0xF93A629B3AF916F9),
+	SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x66E37117E366AA66),
+	SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0xBA6803B968BAD3BA),
+	SPH_C64(0xAE2CB7192CAEEFAE), SPH_C64(0x500DEABA0D50F050),
+	SPH_C64(0x5207F8AA0752F652), SPH_C64(0xAB3D9A313DABE0AB),
+	SPH_C64(0x05112D2811050F05), SPH_C64(0xF01723D317F00DF0),
+	SPH_C64(0x0D396568390D170D), SPH_C64(0x73A2CCBFA2739573),
+	SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0x0414242014040C04),
+	SPH_C64(0x20A03D1DA0206020), SPH_C64(0xFE215DA321FE1FFE),
+	SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0xF5060EFB06F502F5),
+	SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5F3E9DC23E5FE15F),
+	SPH_C64(0x0A225A50220A1E0A), SPH_C64(0xB55B74C15BB5C2B5),
+	SPH_C64(0xC0E78E4EE7C05DC0), SPH_C64(0xA01AC9691AA0FDA0),
+	SPH_C64(0x71A8DEAFA8719371), SPH_C64(0xA50BE4410BA5F2A5),
+	SPH_C64(0x2D995875992D772D), SPH_C64(0x60FD4727FD60A060),
+	SPH_C64(0x72A7C5B7A7729672), SPH_C64(0x93E57FECE593A893),
+	SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x0828484028081808),
+	SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x21A53415A5216321),
+	SPH_C64(0x5C3186DA315CE45C), SPH_C64(0x87A1CB4CA1879487),
+	SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xE047B35347E03DE0),
+	SPH_C64(0x0000000000000000), SPH_C64(0xC3E89556E8C358C3),
+	SPH_C64(0x125A82905A123612), SPH_C64(0x91EF6DFCEF91AE91),
+	SPH_C64(0x8A98AE24988A838A), SPH_C64(0x020A12100A020602),
+	SPH_C64(0x1C6CFCE06C1C241C), SPH_C64(0xE659856359E637E6),
+	SPH_C64(0x454C57124C45CF45), SPH_C64(0xC2ED9C5EEDC25BC2),
+	SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0xFD2E46BB2EFD1AFD),
+	SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x44495E1A4944CC44),
+	SPH_C64(0xA11FC0611FA1FEA1), SPH_C64(0x4C61165A614CD44C),
+	SPH_C64(0x33FFB685FF335533), SPH_C64(0xC5F6A366F6C552C5),
+	SPH_C64(0x84AED054AE849184), SPH_C64(0x23AF2605AF236523),
+	SPH_C64(0x7C91BBC7917C847C), SPH_C64(0xB04A59E94AB0CDB0),
+	SPH_C64(0x25B11035B1256F25), SPH_C64(0x1541BDA841153F15),
+	SPH_C64(0x35E180B5E1355F35), SPH_C64(0x69D0066FD069BB69),
+	SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x94FE40D4FE94A194),
+	SPH_C64(0x4D641F52644DD74D), SPH_C64(0x70ADD7A7AD709070),
+	SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xAF29BE1129AFECAF),
+	SPH_C64(0xCDDEEB26DECD4ACD), SPH_C64(0xD6A928FEA9D667D6),
+	SPH_C64(0x6CC12B47C16CB46C), SPH_C64(0xB75166D151B7C4B7),
+	SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x092D41482D091B09),
+	SPH_C64(0xF31838CB18F308F3), SPH_C64(0x67E6781FE667A967),
+	SPH_C64(0xA40EED490EA4F1A4), SPH_C64(0xEA65E90365EA23EA),
+	SPH_C64(0xEC7BDF337BEC29EC), SPH_C64(0xB6546FD954B6C7B6),
+	SPH_C64(0xD4A33AEEA3D461D4), SPH_C64(0xD2BD0CDEBDD26BD2),
+	SPH_C64(0x1444B4A044143C14), SPH_C64(0x1E66EEF0661E221E),
+	SPH_C64(0xE142BA5B42E13EE1), SPH_C64(0x24B4193DB4246C24),
+	SPH_C64(0x38D8E5DDD8384838), SPH_C64(0xC6F9B87EF9C657C6),
+	SPH_C64(0xDB904D9690DB70DB), SPH_C64(0x4B7A29627A4BDD4B),
+	SPH_C64(0x7A8F8DF78F7A8E7A), SPH_C64(0x3AD2F7CDD23A4E3A),
+	SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x5E3B94CA3B5EE25E),
+	SPH_C64(0xDF8469B684DF7CDF), SPH_C64(0x95FB49DCFB95A295),
+	SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0xAA38933938AAE3AA),
+	SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xCED1F03ED1CE4FCE),
+	SPH_C64(0x071B3F381B070907), SPH_C64(0x0F337778330F110F),
+	SPH_C64(0x3DC9C8F5C93D473D), SPH_C64(0x5825A2FA2558E858),
+	SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x98C22CB4C298B598),
+	SPH_C64(0x9CD60894D69CB99C), SPH_C64(0xF21D31C31DF20BF2),
+	SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x1155998855113311),
+	SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x8B9DA72C9D8B808B),
+	SPH_C64(0x435261225243C543), SPH_C64(0x030F1B180F030503),
+	SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0xDC8B72AE8BDC79DC),
+	SPH_C64(0xE5569E7B56E532E5), SPH_C64(0xB2404BF940B2CBB2),
+	SPH_C64(0x4E6B044A6B4ED24E), SPH_C64(0xC7FCB176FCC754C7),
+	SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0xE96AF21B6AE926E9),
+	SPH_C64(0x27BB0225BB276927), SPH_C64(0x405D7A3A5D40C040),
+	SPH_C64(0xD89F568E9FD875D8), SPH_C64(0x37EB92A5EB375937),
+	SPH_C64(0x92E076E4E092AB92), SPH_C64(0x8F89830C898F8C8F),
+	SPH_C64(0x0105090805010301), SPH_C64(0x1D69F5E8691D271D),
+	SPH_C64(0x5302F1A20253F553), SPH_C64(0x3EC6D3EDC63E423E),
+	SPH_C64(0x5920ABF22059EB59), SPH_C64(0xC1E28746E2C15EC1),
+	SPH_C64(0x4F6E0D426E4FD14F), SPH_C64(0x32FABF8DFA325632),
+	SPH_C64(0x164EA6B04E163A16), SPH_C64(0xFA35798335FA13FA),
+	SPH_C64(0x74B9F387B9749C74), SPH_C64(0xFB30708B30FB10FB),
+	SPH_C64(0x63F25C3FF263A563), SPH_C64(0x9FD9138CD99FBC9F),
+	SPH_C64(0x34E489BDE4345C34), SPH_C64(0x1A72CAD0721A2E1A),
+	SPH_C64(0x2A82674D822A7E2A), SPH_C64(0x5A2FB0EA2F5AEE5A),
+	SPH_C64(0x8D83911C838D8A8D), SPH_C64(0xC9CACF06CAC946C9),
+	SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0xF60915E309F607F6),
+	SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x2888755D88287828),
+	SPH_C64(0x8892BC3492888588), SPH_C64(0x9BCD37ACCD9BB09B),
+	SPH_C64(0x31F5A495F5315331), SPH_C64(0x0E367E70360E120E),
+	SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x4A7F206A7F4ADE4A),
+	SPH_C64(0xE86FFB136FE825E8), SPH_C64(0x96F452C4F496A796),
+	SPH_C64(0xA604FF5904A6F7A6), SPH_C64(0x0C3C6C603C0C140C),
+	SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x798096EF80798B79),
+	SPH_C64(0xBC76358976BCD9BC), SPH_C64(0xBE7C27997CBEDFBE),
+	SPH_C64(0xEF74C42B74EF2CEF), SPH_C64(0x6ECB3957CB6EB26E),
+	SPH_C64(0x46434C0A4346CA46), SPH_C64(0x97F15BCCF197A497),
+	SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0xED7ED63B7EED2AED),
+	SPH_C64(0x197DD1C87D192B19), SPH_C64(0xD99A5F869AD976D9),
+	SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x99C725BCC799B699),
+	SPH_C64(0xA832812932A8E5A8), SPH_C64(0x298D7C558D297B29),
+	SPH_C64(0x64E96307E964AC64), SPH_C64(0x1F63E7F8631F211F),
+	SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x551CC7921C55FF55),
+	SPH_C64(0x135F8B985F133513), SPH_C64(0xBB6D0AB16DBBD0BB),
+	SPH_C64(0xF70C1CEB0CF704F7), SPH_C64(0x6FCE305FCE6FB16F),
+	SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x474645024647C947),
+	SPH_C64(0x2F934A65932F712F), SPH_C64(0xEE71CD2371EE2FEE),
+	SPH_C64(0xB86211A962B8D5B8), SPH_C64(0x7B8A84FF8A7B8D7B),
+	SPH_C64(0x8997B53C97898689), SPH_C64(0x30F0AD9DF0305030),
+	SPH_C64(0xD3B805D6B8D368D3), SPH_C64(0x7F9EA0DF9E7F817F),
+	SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82)
+};
+
+#endif
+
+static const sph_u64 old0_RC[10] = {
+	SPH_C64(0xE46A9D482BEBD068),
+	SPH_C64(0x9E85F17D8156A3E3),
+	SPH_C64(0xD561A917CA788E2C),
+	SPH_C64(0x422251773C8C0B5D),
+	SPH_C64(0x18B386CC8041543F),
+	SPH_C64(0x6BD136F46206572E),
+	SPH_C64(0xF92649DA1075651B),
+	SPH_C64(0xAB5250AEBAE766CB),
+	SPH_C64(0xFE20043B730DF005),
+	SPH_C64(0xA0C0B50A5FB4F5DD)
+};
+
+/* ====================================================================== */
+/*
+ * Constants for plain WHIRLPOOL-1 (second version).
+ */
+
+static const sph_u64 old1_T0[256] = {
+	SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323),
+	SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8),
+	SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8),
+	SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F),
+	SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6),
+	SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5),
+	SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F),
+	SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252),
+	SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC),
+	SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E),
+	SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C),
+	SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535),
+	SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0),
+	SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2),
+	SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B),
+	SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757),
+	SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777),
+	SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5),
+	SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0),
+	SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA),
+	SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9),
+	SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A),
+	SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0),
+	SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585),
+	SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D),
+	SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4),
+	SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E),
+	SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767),
+	SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727),
+	SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B),
+	SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D),
+	SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8),
+	SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE),
+	SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666),
+	SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717),
+	SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E),
+	SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D),
+	SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707),
+	SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A),
+	SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333),
+	SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202),
+	SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171),
+	SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919),
+	SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9),
+	SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3),
+	SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888),
+	SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626),
+	SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0),
+	SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F),
+	SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080),
+	SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD),
+	SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848),
+	SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A),
+	SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F),
+	SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868),
+	SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE),
+	SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454),
+	SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222),
+	SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1),
+	SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212),
+	SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808),
+	SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC),
+	SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1),
+	SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D),
+	SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B),
+	SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282),
+	SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B),
+	SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF),
+	SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050),
+	SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3),
+	SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF),
+	SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555),
+	SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA),
+	SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA),
+	SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0),
+	SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C),
+	SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D),
+	SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575),
+	SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A),
+	SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6),
+	SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F),
+	SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4),
+	SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696),
+	SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5),
+	SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959),
+	SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272),
+	SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C),
+	SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878),
+	SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C),
+	SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5),
+	SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161),
+	SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121),
+	SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E),
+	SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7),
+	SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404),
+	SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999),
+	SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D),
+	SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF),
+	SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424),
+	SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB),
+	SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111),
+	SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E),
+	SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB),
+	SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181),
+	SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7),
+	SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313),
+	SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3),
+	SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E),
+	SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303),
+	SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444),
+	SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9),
+	SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB),
+	SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353),
+	SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B),
+	SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C),
+	SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474),
+	SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646),
+	SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989),
+	SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1),
+	SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A),
+	SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909),
+	SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6),
+	SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED),
+	SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242),
+	SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4),
+	SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C),
+	SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686)
+};
+
+#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL
+
+static const sph_u64 old1_T1[256] = {
+	SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF),
+	SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F),
+	SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862),
+	SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E),
+	SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604),
+	SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506),
+	SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE),
+	SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207),
+	SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76),
+	SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C),
+	SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C),
+	SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1),
+	SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047),
+	SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED),
+	SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A),
+	SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716),
+	SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6),
+	SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556),
+	SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017),
+	SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95),
+	SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA),
+	SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22),
+	SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A),
+	SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB),
+	SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34),
+	SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403),
+	SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6),
+	SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6),
+	SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB),
+	SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D),
+	SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94),
+	SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F),
+	SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71),
+	SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3),
+	SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B),
+	SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC),
+	SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99),
+	SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B),
+	SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F),
+	SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF),
+	SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A),
+	SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8),
+	SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D),
+	SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A),
+	SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348),
+	SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892),
+	SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE),
+	SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A),
+	SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33),
+	SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA),
+	SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE),
+	SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875),
+	SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F),
+	SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E),
+	SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5),
+	SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C),
+	SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419),
+	SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA),
+	SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112),
+	SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A),
+	SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828),
+	SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B),
+	SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F),
+	SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9),
+	SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000),
+	SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87),
+	SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0),
+	SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77),
+	SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29),
+	SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D),
+	SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318),
+	SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74),
+	SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C),
+	SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65),
+	SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68),
+	SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7),
+	SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C),
+	SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64),
+	SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC),
+	SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98),
+	SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659),
+	SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63),
+	SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3),
+	SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4),
+	SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6),
+	SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920),
+	SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7),
+	SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61),
+	SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885),
+	SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86),
+	SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B),
+	SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8),
+	SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5),
+	SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66),
+	SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC),
+	SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414),
+	SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7),
+	SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39),
+	SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84),
+	SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4),
+	SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D),
+	SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155),
+	SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B),
+	SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60),
+	SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF),
+	SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C),
+	SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F),
+	SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8),
+	SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB),
+	SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F),
+	SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449),
+	SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937),
+	SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D),
+	SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302),
+	SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27),
+	SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1),
+	SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9),
+	SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643),
+	SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997),
+	SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142),
+	SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2),
+	SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D),
+	SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654),
+	SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E),
+	SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257),
+	SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E),
+	SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31),
+	SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4)
+};
+
+static const sph_u64 old1_T2[256] = {
+	SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26),
+	SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB),
+	SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211),
+	SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D),
+	SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF),
+	SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E),
+	SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30),
+	SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8),
+	SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635),
+	SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A),
+	SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C),
+	SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180),
+	SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3),
+	SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C),
+	SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29),
+	SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5),
+	SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8),
+	SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E),
+	SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723),
+	SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544),
+	SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF),
+	SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A),
+	SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9),
+	SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9),
+	SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F),
+	SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307),
+	SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3),
+	SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678),
+	SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02),
+	SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7),
+	SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2),
+	SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56),
+	SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD),
+	SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371),
+	SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF),
+	SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A),
+	SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958),
+	SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F),
+	SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0),
+	SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6),
+	SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12),
+	SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE),
+	SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1),
+	SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F),
+	SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8),
+	SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC),
+	SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B),
+	SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59),
+	SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377),
+	SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4),
+	SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB),
+	SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532),
+	SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D),
+	SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D),
+	SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F),
+	SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7),
+	SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE),
+	SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F),
+	SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A),
+	SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82),
+	SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848),
+	SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF),
+	SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0),
+	SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8),
+	SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000),
+	SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E),
+	SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6),
+	SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3),
+	SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE),
+	SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA),
+	SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838),
+	SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4),
+	SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7),
+	SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9),
+	SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803),
+	SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E),
+	SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC),
+	SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F),
+	SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA),
+	SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE),
+	SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985),
+	SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7),
+	SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A),
+	SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452),
+	SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3),
+	SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB),
+	SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5),
+	SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116),
+	SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F),
+	SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698),
+	SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4),
+	SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E),
+	SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534),
+	SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE),
+	SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1),
+	SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424),
+	SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725),
+	SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965),
+	SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469),
+	SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419),
+	SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A),
+	SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599),
+	SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04),
+	SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0),
+	SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD),
+	SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C),
+	SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B),
+	SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805),
+	SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39),
+	SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B),
+	SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E),
+	SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788),
+	SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A),
+	SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1),
+	SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753),
+	SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B),
+	SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3),
+	SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C),
+	SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5),
+	SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA),
+	SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7),
+	SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41),
+	SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F),
+	SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6),
+	SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768),
+	SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED),
+	SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186),
+	SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2)
+};
+
+static const sph_u64 old1_T3[256] = {
+	SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605),
+	SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13),
+	SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9),
+	SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42),
+	SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59),
+	SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB),
+	SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F),
+	SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA),
+	SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589),
+	SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04),
+	SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60),
+	SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5),
+	SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353),
+	SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E),
+	SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962),
+	SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582),
+	SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F),
+	SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B),
+	SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3),
+	SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E),
+	SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06),
+	SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50),
+	SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969),
+	SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C),
+	SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2),
+	SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3),
+	SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED),
+	SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F),
+	SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225),
+	SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C),
+	SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF),
+	SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E),
+	SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23),
+	SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117),
+	SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8),
+	SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84),
+	SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875),
+	SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38),
+	SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA),
+	SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685),
+	SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210),
+	SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF),
+	SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8),
+	SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86),
+	SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B),
+	SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34),
+	SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D),
+	SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9),
+	SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778),
+	SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474),
+	SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26),
+	SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A),
+	SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7),
+	SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2),
+	SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67),
+	SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719),
+	SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A),
+	SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D),
+	SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB),
+	SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290),
+	SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840),
+	SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33),
+	SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061),
+	SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5),
+	SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45),
+	SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664),
+	SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8),
+	SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11),
+	SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA),
+	SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB),
+	SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B),
+	SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792),
+	SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903),
+	SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9),
+	SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E),
+	SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0),
+	SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52),
+	SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F),
+	SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24),
+	SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563),
+	SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8),
+	SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE),
+	SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4),
+	SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366),
+	SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2),
+	SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7),
+	SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A),
+	SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7),
+	SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814),
+	SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441),
+	SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F),
+	SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415),
+	SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0),
+	SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176),
+	SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420),
+	SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC),
+	SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568),
+	SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6),
+	SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D),
+	SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31),
+	SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988),
+	SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A),
+	SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B),
+	SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C),
+	SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB),
+	SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98),
+	SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6),
+	SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957),
+	SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18),
+	SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A),
+	SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821),
+	SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1),
+	SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2),
+	SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358),
+	SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47),
+	SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387),
+	SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A),
+	SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C),
+	SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B),
+	SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD),
+	SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148),
+	SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9),
+	SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B),
+	SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A),
+	SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49),
+	SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA),
+	SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244)
+};
+
+static const sph_u64 old1_T4[256] = {
+	SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF),
+	SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F),
+	SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962),
+	SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E),
+	SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904),
+	SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06),
+	SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE),
+	SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07),
+	SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976),
+	SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C),
+	SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C),
+	SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1),
+	SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347),
+	SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED),
+	SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A),
+	SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216),
+	SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6),
+	SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56),
+	SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317),
+	SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95),
+	SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA),
+	SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022),
+	SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A),
+	SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB),
+	SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234),
+	SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303),
+	SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6),
+	SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6),
+	SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB),
+	SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D),
+	SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94),
+	SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F),
+	SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371),
+	SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3),
+	SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B),
+	SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC),
+	SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599),
+	SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B),
+	SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F),
+	SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF),
+	SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A),
+	SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8),
+	SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D),
+	SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A),
+	SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48),
+	SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492),
+	SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE),
+	SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A),
+	SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833),
+	SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA),
+	SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE),
+	SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75),
+	SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F),
+	SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E),
+	SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5),
+	SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C),
+	SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19),
+	SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA),
+	SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12),
+	SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A),
+	SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028),
+	SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B),
+	SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F),
+	SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9),
+	SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587),
+	SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0),
+	SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877),
+	SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129),
+	SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D),
+	SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18),
+	SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74),
+	SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C),
+	SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365),
+	SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968),
+	SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7),
+	SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C),
+	SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264),
+	SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC),
+	SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498),
+	SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359),
+	SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863),
+	SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3),
+	SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4),
+	SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6),
+	SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220),
+	SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7),
+	SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61),
+	SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785),
+	SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486),
+	SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B),
+	SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8),
+	SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5),
+	SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066),
+	SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC),
+	SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014),
+	SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7),
+	SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839),
+	SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684),
+	SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4),
+	SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D),
+	SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855),
+	SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B),
+	SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60),
+	SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF),
+	SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C),
+	SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F),
+	SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8),
+	SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB),
+	SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F),
+	SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49),
+	SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137),
+	SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D),
+	SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202),
+	SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827),
+	SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1),
+	SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9),
+	SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43),
+	SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97),
+	SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42),
+	SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2),
+	SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D),
+	SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954),
+	SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E),
+	SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57),
+	SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E),
+	SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31),
+	SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4)
+};
+
+static const sph_u64 old1_T5[256] = {
+	SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23),
+	SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8),
+	SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8),
+	SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F),
+	SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6),
+	SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5),
+	SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F),
+	SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752),
+	SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC),
+	SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E),
+	SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C),
+	SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135),
+	SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0),
+	SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2),
+	SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B),
+	SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657),
+	SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677),
+	SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5),
+	SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0),
+	SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA),
+	SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9),
+	SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A),
+	SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0),
+	SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85),
+	SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D),
+	SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4),
+	SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E),
+	SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667),
+	SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27),
+	SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B),
+	SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D),
+	SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8),
+	SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE),
+	SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366),
+	SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17),
+	SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E),
+	SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D),
+	SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07),
+	SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A),
+	SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33),
+	SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02),
+	SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871),
+	SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19),
+	SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9),
+	SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3),
+	SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288),
+	SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26),
+	SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0),
+	SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F),
+	SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80),
+	SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD),
+	SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548),
+	SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A),
+	SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F),
+	SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568),
+	SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE),
+	SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954),
+	SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22),
+	SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1),
+	SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12),
+	SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808),
+	SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC),
+	SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1),
+	SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D),
+	SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000),
+	SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B),
+	SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082),
+	SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B),
+	SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF),
+	SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50),
+	SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3),
+	SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF),
+	SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55),
+	SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA),
+	SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA),
+	SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0),
+	SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C),
+	SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D),
+	SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75),
+	SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A),
+	SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6),
+	SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F),
+	SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4),
+	SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496),
+	SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5),
+	SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059),
+	SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772),
+	SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C),
+	SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578),
+	SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C),
+	SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5),
+	SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861),
+	SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521),
+	SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E),
+	SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7),
+	SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404),
+	SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799),
+	SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D),
+	SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF),
+	SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424),
+	SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB),
+	SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511),
+	SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E),
+	SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB),
+	SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81),
+	SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7),
+	SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13),
+	SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3),
+	SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E),
+	SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03),
+	SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944),
+	SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9),
+	SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB),
+	SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253),
+	SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B),
+	SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C),
+	SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974),
+	SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346),
+	SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789),
+	SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1),
+	SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A),
+	SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09),
+	SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6),
+	SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED),
+	SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742),
+	SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4),
+	SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C),
+	SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486)
+};
+
+static const sph_u64 old1_T6[256] = {
+	SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365),
+	SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825),
+	SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5),
+	SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1),
+	SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7),
+	SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502),
+	SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1),
+	SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6),
+	SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9),
+	SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F),
+	SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14),
+	SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F),
+	SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D),
+	SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B),
+	SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD),
+	SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9),
+	SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799),
+	SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532),
+	SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D),
+	SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73),
+	SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946),
+	SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E),
+	SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD),
+	SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592),
+	SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7),
+	SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401),
+	SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42),
+	SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9),
+	SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769),
+	SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80),
+	SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87),
+	SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875),
+	SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F),
+	SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA),
+	SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739),
+	SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF),
+	SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77),
+	SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709),
+	SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE),
+	SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355),
+	SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206),
+	SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193),
+	SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B),
+	SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976),
+	SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338),
+	SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885),
+	SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A),
+	SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD),
+	SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11),
+	SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D),
+	SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A),
+	SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8),
+	SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E),
+	SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1),
+	SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8),
+	SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF),
+	SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC),
+	SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266),
+	SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E),
+	SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236),
+	SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818),
+	SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29),
+	SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE),
+	SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47),
+	SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D),
+	SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B),
+	SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D),
+	SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC),
+	SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0),
+	SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308),
+	SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C),
+	SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF),
+	SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23),
+	SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3),
+	SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D),
+	SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24),
+	SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7),
+	SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F),
+	SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83),
+	SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637),
+	SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21),
+	SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461),
+	SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7),
+	SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552),
+	SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB),
+	SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296),
+	SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4),
+	SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888),
+	SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89),
+	SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2),
+	SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3),
+	SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163),
+	SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22),
+	SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754),
+	SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C),
+	SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6),
+	SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17),
+	SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C),
+	SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C),
+	SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0),
+	SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133),
+	SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2),
+	SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20),
+	SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E),
+	SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704),
+	SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335),
+	SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368),
+	SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2),
+	SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305),
+	SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC),
+	SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6),
+	SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0),
+	SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5),
+	SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D),
+	SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4),
+	SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C),
+	SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA),
+	SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986),
+	SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E),
+	SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E),
+	SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B),
+	SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7),
+	SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A),
+	SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6),
+	SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1),
+	SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4),
+	SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697)
+};
+
+static const sph_u64 old1_T7[256] = {
+	SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523),
+	SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8),
+	SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8),
+	SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F),
+	SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6),
+	SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5),
+	SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F),
+	SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652),
+	SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC),
+	SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E),
+	SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C),
+	SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35),
+	SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0),
+	SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2),
+	SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B),
+	SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957),
+	SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977),
+	SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5),
+	SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0),
+	SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA),
+	SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9),
+	SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A),
+	SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0),
+	SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285),
+	SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D),
+	SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4),
+	SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E),
+	SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967),
+	SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927),
+	SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B),
+	SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D),
+	SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8),
+	SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE),
+	SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66),
+	SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917),
+	SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E),
+	SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D),
+	SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907),
+	SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A),
+	SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533),
+	SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602),
+	SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371),
+	SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19),
+	SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9),
+	SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3),
+	SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588),
+	SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26),
+	SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0),
+	SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F),
+	SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80),
+	SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD),
+	SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848),
+	SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A),
+	SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F),
+	SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868),
+	SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE),
+	SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54),
+	SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622),
+	SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1),
+	SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612),
+	SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808),
+	SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC),
+	SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1),
+	SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D),
+	SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B),
+	SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82),
+	SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B),
+	SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF),
+	SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050),
+	SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3),
+	SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF),
+	SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55),
+	SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA),
+	SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA),
+	SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0),
+	SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C),
+	SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D),
+	SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75),
+	SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A),
+	SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6),
+	SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F),
+	SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4),
+	SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796),
+	SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5),
+	SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59),
+	SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672),
+	SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C),
+	SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878),
+	SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C),
+	SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5),
+	SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361),
+	SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321),
+	SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E),
+	SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7),
+	SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04),
+	SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699),
+	SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D),
+	SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF),
+	SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24),
+	SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB),
+	SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311),
+	SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E),
+	SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB),
+	SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81),
+	SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7),
+	SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513),
+	SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3),
+	SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E),
+	SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503),
+	SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44),
+	SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9),
+	SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB),
+	SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553),
+	SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B),
+	SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C),
+	SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74),
+	SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46),
+	SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689),
+	SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1),
+	SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A),
+	SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09),
+	SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6),
+	SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED),
+	SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642),
+	SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4),
+	SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C),
+	SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786)
+};
+
+#endif
+
+static const sph_u64 old1_RC[10] = {
+	SPH_C64(0x4F01B887E8C62318),
+	SPH_C64(0x52916F79F5D2A636),
+	SPH_C64(0x357B0CA38E9BBC60),
+	SPH_C64(0x57FE4B2EC2D7E01D),
+	SPH_C64(0xDA4AF09FE5377715),
+	SPH_C64(0x856BA0B10A29C958),
+	SPH_C64(0x67053ECBF4105DBD),
+	SPH_C64(0xD8957DA78B4127E4),
+	SPH_C64(0x9E4717DD667CEEFB),
+	SPH_C64(0x33835AAD07BF2DCA)
+};
+
+/* ====================================================================== */
+
+#define DECL8(z)   sph_u64 z ## 0, z ## 1, z ## 2, z ## 3, \
+                   z ## 4, z ## 5, z ## 6, z ## 7
+
+#if SPH_LITTLE_FAST
+#define READ_DATA_W(x)   do { \
+		n ## x = sph_dec64le_aligned( \
+			(const unsigned char *)src + 8 * (x)); \
+	} while (0)
+#define UPDATE_STATE_W(x)   do { \
+		state[x] ^= n ## x ^ sph_dec64le_aligned( \
+			(const unsigned char *)src + 8 * (x)); \
+	} while (0)
+#define LVARS   DECL8(n); DECL8(h);
+#else
+#define READ_DATA_W(x)   do { \
+		sn ## x = n ## x = sph_dec64le_aligned( \
+			(const unsigned char *)src + 8 * (x)); \
+	} while (0)
+#define UPDATE_STATE_W(x)   do { \
+		state[x] ^= n ## x ^ sn ## x; \
+	} while (0)
+#define LVARS   DECL8(n); DECL8(sn); DECL8(h);
+#endif
+
+#define READ_STATE_W(x)   do { h ## x = state[x]; } while (0)
+
+#define MUL8(FUN)   do { \
+		FUN(0); \
+		FUN(1); \
+		FUN(2); \
+		FUN(3); \
+		FUN(4); \
+		FUN(5); \
+		FUN(6); \
+		FUN(7); \
+	} while (0)
+
+/*
+ * First operation: XOR the input data with the first round key.
+ */
+#define ROUND0_W(x)   do { \
+		n ## x ^= h ## x; \
+	} while (0)
+
+#define READ_DATA      MUL8(READ_DATA_W)
+#define READ_STATE     MUL8(READ_STATE_W)
+#define ROUND0         MUL8(ROUND0_W)
+#define UPDATE_STATE   MUL8(UPDATE_STATE_W)
+
+#define BYTE(x, n)     ((unsigned)((x) >> (8 * (n))) & 0xFF)
+
+#if SPH_SMALL_FOOTPRINT_WHIRLPOOL
+
+static SPH_INLINE sph_u64
+table_skew(sph_u64 val, int num)
+{
+	return SPH_ROTL64(val, 8 * num);
+}
+
+#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \
+	(table ## 0[BYTE(in ## i0, 0)] \
+	^ table_skew(table ## 0[BYTE(in ## i1, 1)], 1) \
+	^ table_skew(table ## 0[BYTE(in ## i2, 2)], 2) \
+	^ table_skew(table ## 0[BYTE(in ## i3, 3)], 3) \
+	^ table_skew(table ## 0[BYTE(in ## i4, 4)], 4) \
+	^ table_skew(table ## 0[BYTE(in ## i5, 5)], 5) \
+	^ table_skew(table ## 0[BYTE(in ## i6, 6)], 6) \
+	^ table_skew(table ## 0[BYTE(in ## i7, 7)], 7))
+#else
+#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \
+	(table ## 0[BYTE(in ## i0, 0)] \
+	^ table ## 1[BYTE(in ## i1, 1)] \
+	^ table ## 2[BYTE(in ## i2, 2)] \
+	^ table ## 3[BYTE(in ## i3, 3)] \
+	^ table ## 4[BYTE(in ## i4, 4)] \
+	^ table ## 5[BYTE(in ## i5, 5)] \
+	^ table ## 6[BYTE(in ## i6, 6)] \
+	^ table ## 7[BYTE(in ## i7, 7)])
+#endif
+
+#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7)   do { \
+		out ## 0 = ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1) ^ c0; \
+		out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2) ^ c1; \
+		out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3) ^ c2; \
+		out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4) ^ c3; \
+		out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5) ^ c4; \
+		out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6) ^ c5; \
+		out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7) ^ c6; \
+		out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0) ^ c7; \
+	} while (0)
+
+#define ROUND_KSCHED(table, in, out, c) \
+	ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0)
+
+#define ROUND_WENC(table, in, key, out) \
+	ROUND(table, in, out, key ## 0, key ## 1, key ## 2, \
+		key ## 3, key ## 4, key ## 5, key ## 6, key ## 7)
+
+#define TRANSFER(dst, src)   do { \
+		dst ## 0 = src ## 0; \
+		dst ## 1 = src ## 1; \
+		dst ## 2 = src ## 2; \
+		dst ## 3 = src ## 3; \
+		dst ## 4 = src ## 4; \
+		dst ## 5 = src ## 5; \
+		dst ## 6 = src ## 6; \
+		dst ## 7 = src ## 7; \
+	} while (0)
+
+/* see sph_whirlpool.h */
+void
+sph_whirlpool_init(void *cc)
+{
+	sph_whirlpool_context *sc;
+
+	sc = cc;
+	/*
+	 * We want to set all eight 64-bit words to 0. A "memset()"
+	 * is not, theoretically, fully standard, but in practice it
+	 * will work everywhere.
+	 */
+	memset(sc->state, 0, sizeof sc->state);
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = sc->count_low = 0;
+#endif
+}
+
+#define ROUND_FUN(name, type) \
+static void \
+name ## _round(const void *src, sph_u64 *state) \
+{ \
+	LVARS \
+	int r; \
+ \
+	READ_DATA; \
+	READ_STATE; \
+	ROUND0; \
+	for (r = 0; r < 10; r ++) { \
+		DECL8(tmp); \
+ \
+		ROUND_KSCHED(type ## _T, h, tmp, type ## _RC[r]); \
+		TRANSFER(h, tmp); \
+		ROUND_WENC(type ## _T, n, h, tmp); \
+		TRANSFER(n, tmp); \
+	} \
+	UPDATE_STATE; \
+}
+
+ROUND_FUN(whirlpool, plain)
+ROUND_FUN(whirlpool0, old0)
+ROUND_FUN(whirlpool1, old1)
+
+/*
+ * We want big-endian encoding of the message length, over 256 bits. BE64
+ * triggers that. However, our block length is 512 bits, not 1024 bits.
+ * Internally, our encoding/decoding is little-endian, which is not a
+ * problem here since we also deactivate output in md_helper.c.
+ */
+#define BE64   1
+#define SVAL   sc->state
+#define BLEN   64U
+#define PLW4   1
+
+#define RFUN   whirlpool_round
+#define HASH   whirlpool
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+
+#define RFUN   whirlpool0_round
+#define HASH   whirlpool0
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+
+#define RFUN   whirlpool1_round
+#define HASH   whirlpool1
+#include "md_helper.c"
+#undef RFUN
+#undef HASH
+
+#define MAKE_CLOSE(name) \
+void \
+sph_ ## name ## _close(void *cc, void *dst) \
+{ \
+	sph_ ## name ## _context *sc; \
+	int i; \
+ \
+	name ## _close(cc, dst, 0); \
+	sc = cc; \
+	for (i = 0; i < 8; i ++) \
+		sph_enc64le((unsigned char *)dst + 8 * i, sc->state[i]); \
+	sph_ ## name ## _init(cc); \
+}
+
+MAKE_CLOSE(whirlpool)
+MAKE_CLOSE(whirlpool0)
+MAKE_CLOSE(whirlpool1)
+
+#endif
diff --git a/util.c b/util.c
index 451aaed484..38c237a121 100644
--- a/util.c
+++ b/util.c
@@ -7,7 +7,7 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.  See COPYING for more details.
  */
-
+ 
 #define _GNU_SOURCE
 #include "cpuminer-config.h"
 
@@ -74,21 +74,23 @@ void applog(int prio, const char *fmt, ...)
 
 #ifdef HAVE_SYSLOG_H
 	if (use_syslog) {
-		va_list ap2;
+		va_list ap2, ap3;
 		char *buf;
 		int len;
 		
 		va_copy(ap2, ap);
+		va_copy(ap3, ap);
 		len = vsnprintf(NULL, 0, fmt, ap2) + 1;
 		va_end(ap2);
 		buf = alloca(len);
-		if (vsnprintf(buf, len, fmt, ap) >= 0)
+		if (vsnprintf(buf, len, fmt, ap3) >= 0)
 			syslog(prio, "%s", buf);
+		va_end(ap3);
 	}
 #else
 	if (0) {}
 #endif
-	else {
+	if (1) {
 		char *f;
 		int len;
 		time_t now;
@@ -296,6 +298,7 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd,
 }
 #endif
 
+
 json_t *json_rpc_call(CURL *curl, const char *url,
 		      const char *userpass, const char *rpc_req,
 		      bool longpoll_scan, bool longpoll, int *curl_err)
@@ -449,6 +452,229 @@ json_t *json_rpc_call(CURL *curl, const char *url,
 	return NULL;
 }
 
+
+
+static char *hack_json_numbers(const char *in)
+{
+	char *out;
+	int i, off, intoff;
+	bool in_str, in_int;
+
+	out =(char*) calloc(2 * strlen(in) + 1, 1);
+	if (!out)
+		return NULL;
+	off = intoff = 0;
+	in_str = in_int = false;
+	for (i = 0; in[i]; i++) {
+		char c = in[i];
+		if (c == '"') {
+			in_str = !in_str;
+		}
+		else if (c == '\\') {
+			out[off++] = c;
+			if (!in[++i])
+				break;
+		}
+		else if (!in_str && !in_int && isdigit(c)) {
+			intoff = off;
+			in_int = true;
+		}
+		else if (in_int && !isdigit(c)) {
+			if (c != '.' && c != 'e' && c != 'E' && c != '+' && c != '-') {
+				in_int = false;
+				if (off - intoff > 4) {
+					char *end;
+#if JSON_INTEGER_IS_LONG_LONG
+					errno = 0;
+					strtoll(out + intoff, &end, 10);
+					if (!*end && errno == ERANGE) {
+#else
+					long l;
+					errno = 0;
+					l = strtol(out + intoff, &end, 10);
+					if (!*end && (errno == ERANGE || l > INT_MAX)) {
+#endif
+						out[off++] = '.';
+						out[off++] = '0';
+					}
+					}
+				}
+			}
+		out[off++] = in[i];
+		}
+	return out;
+	}
+
+
+json_t *json_rpc_call2(CURL *curl, const char *url,
+	const char *userpass, const char *rpc_req,
+	int *curl_err, int flags)
+{
+	json_t *val, *err_val, *res_val;
+	int rc;
+	long http_rc;
+	struct data_buffer all_data = { 0 };
+	struct upload_buffer upload_data;
+	char *json_buf;
+	json_error_t err;
+	struct curl_slist *headers = NULL;
+	char len_hdr[64];
+	char curl_err_str[CURL_ERROR_SIZE];
+	long timeout = (flags & JSON_RPC_LONGPOLL) ? opt_timeout : 30;
+	struct header_info hi = { 0 };
+
+	/* it is assumed that 'curl' is freshly [re]initialized at this pt */
+
+	if (opt_protocol)
+		curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
+	curl_easy_setopt(curl, CURLOPT_URL, url);
+	if (opt_cert)
+		curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert);
+	curl_easy_setopt(curl, CURLOPT_ENCODING, "");
+	curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
+	curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
+	curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
+	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, all_data_cb);
+	curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
+	curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb);
+	curl_easy_setopt(curl, CURLOPT_READDATA, &upload_data);
+#if LIBCURL_VERSION_NUM >= 0x071200
+	curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb);
+	curl_easy_setopt(curl, CURLOPT_SEEKDATA, &upload_data);
+#endif
+	curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
+	if (opt_redirect)
+		curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+	curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
+	curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb);
+	curl_easy_setopt(curl, CURLOPT_HEADERDATA, &hi);
+	if (opt_proxy) {
+		curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy);
+		curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type);
+	}
+	if (userpass) {
+		curl_easy_setopt(curl, CURLOPT_USERPWD, userpass);
+		curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
+	}
+#if LIBCURL_VERSION_NUM >= 0x070f06
+	if (flags & JSON_RPC_LONGPOLL)
+		curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb);
+#endif
+	curl_easy_setopt(curl, CURLOPT_POST, 1);
+
+	if (opt_protocol)
+		applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req);
+
+	upload_data.buf = rpc_req;
+	upload_data.len = strlen(rpc_req);
+	upload_data.pos = 0;
+	sprintf(len_hdr, "Content-Length: %lu",
+		(unsigned long)upload_data.len);
+
+	headers = curl_slist_append(headers, "Content-Type: application/json");
+	headers = curl_slist_append(headers, len_hdr);
+	headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
+	headers = curl_slist_append(headers, "X-Mining-Extensions: midstate");
+	headers = curl_slist_append(headers, "Accept:"); /* disable Accept hdr*/
+	headers = curl_slist_append(headers, "Expect:"); /* disable Expect hdr*/
+
+	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+	rc = curl_easy_perform(curl);
+	if (curl_err != NULL)
+		*curl_err = rc;
+	if (rc) {
+		curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_rc);
+		if (!((flags & JSON_RPC_LONGPOLL) && rc == CURLE_OPERATION_TIMEDOUT) &&
+			!((flags & JSON_RPC_QUIET_404) && http_rc == 404))
+			applog(LOG_ERR, "HTTP request failed: %s", curl_err_str);
+		if (curl_err && (flags & JSON_RPC_QUIET_404) && http_rc == 404)
+			*curl_err = CURLE_OK;
+		goto err_out;
+	}
+
+	/* If X-Stratum was found, activate Stratum */
+	if (want_stratum && hi.stratum_url &&
+		!strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) {
+		have_stratum = true;
+		tq_push(thr_info[stratum_thr_id].q, hi.stratum_url);
+		hi.stratum_url = NULL;
+	}
+
+	/* If X-Long-Polling was found, activate long polling */
+	if (!have_longpoll && want_longpoll && hi.lp_path && !have_gbt &&
+		allow_getwork && !have_stratum) {
+		have_longpoll = true;
+		tq_push(thr_info[longpoll_thr_id].q, hi.lp_path);
+		hi.lp_path = NULL;
+	}
+
+	if (!all_data.buf) {
+		applog(LOG_ERR, "Empty data received in json_rpc_call.");
+		goto err_out;
+	}
+
+	json_buf = hack_json_numbers((const char*)all_data.buf);
+	errno = 0; /* needed for Jansson < 2.1 */
+	val = JSON_LOADS(json_buf, &err);
+	free(json_buf);
+	if (!val) {
+		applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
+		goto err_out;
+	}
+
+	if (opt_protocol) {
+		char *s = json_dumps(val, JSON_INDENT(3));
+		applog(LOG_DEBUG, "JSON protocol response:\n%s", s);
+		free(s);
+	}
+
+	/* JSON-RPC valid response returns a 'result' and a null 'error'. */
+	res_val = json_object_get(val, "result");
+	err_val = json_object_get(val, "error");
+
+	if (!res_val || (err_val && !json_is_null(err_val))) {
+		char *s;
+
+		if (err_val)
+			s = json_dumps(err_val, JSON_INDENT(3));
+		else
+			s = strdup("(unknown reason)");
+
+		applog(LOG_ERR, "JSON-RPC call failed: %s", s);
+
+		free(s);
+
+		goto err_out;
+	}
+
+	if (hi.reason)
+		json_object_set_new(val, "reject-reason", json_string(hi.reason));
+
+	databuf_free(&all_data);
+	curl_slist_free_all(headers);
+	curl_easy_reset(curl);
+	return val;
+
+err_out:
+	free(hi.lp_path);
+	free(hi.reason);
+	free(hi.stratum_url);
+	databuf_free(&all_data);
+	curl_slist_free_all(headers);
+	curl_easy_reset(curl);
+	return NULL;
+}
+
+
+void abin2hex(char *s, const unsigned char *p, size_t len)
+{
+	int i;
+	for (i = 0; i < len; i++)
+		sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
+}
+
+
 char *bin2hex(const unsigned char *p, size_t len)
 {
 	unsigned int i;
@@ -489,6 +715,140 @@ bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
 	return (len == 0 && *hexstr == 0) ? true : false;
 }
 
+int varint_encode(unsigned char *p, uint64_t n)
+{
+	int i;
+	if (n < 0xfd) {
+		p[0] = n;
+		return 1;
+	}
+	if (n <= 0xffff) {
+		p[0] = 0xfd;
+		p[1] = n & 0xff;
+		p[2] = n >> 8;
+		return 3;
+	}
+	if (n <= 0xffffffff) {
+		p[0] = 0xfe;
+		for (i = 1; i < 5; i++) {
+			p[i] = n & 0xff;
+			n >>= 8;
+		}
+		return 5;
+	}
+	p[0] = 0xff;
+	for (i = 1; i < 9; i++) {
+		p[i] = n & 0xff;
+		n >>= 8;
+	}
+	return 9;
+}
+
+static const char b58digits[] = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+
+static bool b58dec(unsigned char *bin, size_t binsz, const char *b58)
+{
+	size_t i, j;
+	uint64_t t;
+	uint32_t c;
+	uint32_t *outi;
+	size_t outisz = (binsz + 3) / 4;
+	int rem = binsz % 4;
+	uint32_t remmask = 0xffffffff << (8 * rem);
+	size_t b58sz = strlen(b58);
+	bool rc = false;
+
+	outi = (uint32_t*) calloc(outisz, sizeof(*outi));
+
+	for (i = 0; i < b58sz; ++i) {
+		for (c = 0; b58digits[c] != b58[i]; c++)
+			if (!b58digits[c])
+				goto out;
+		for (j = outisz; j--;) {
+			t = (uint64_t)outi[j] * 58 + c;
+			c = t >> 32;
+			outi[j] = t & 0xffffffff;
+		}
+		if (c || outi[0] & remmask)
+			goto out;
+	}
+
+	j = 0;
+	switch (rem) {
+	case 3:
+		*(bin++) = (outi[0] >> 16) & 0xff;
+	case 2:
+		*(bin++) = (outi[0] >> 8) & 0xff;
+	case 1:
+		*(bin++) = outi[0] & 0xff;
+		++j;
+	default:
+		break;
+	}
+	for (; j < outisz; ++j) {
+		be32enc((uint32_t *)bin, outi[j]);
+		bin += sizeof(uint32_t);
+	}
+
+	rc = true;
+out:
+	free(outi);
+	return rc;
+}
+
+static int b58check(unsigned char *bin, size_t binsz, const char *b58)
+{
+	unsigned char buf[32];
+	int i;
+
+	sha256d(buf, bin, binsz - 4);
+	if (memcmp(&bin[binsz - 4], buf, 4))
+		return -1;
+
+	/* Check number of zeros is correct AFTER verifying checksum
+	* (to avoid possibility of accessing the string beyond the end) */
+	for (i = 0; bin[i] == '\0' && b58[i] == '1'; ++i);
+	if (bin[i] == '\0' || b58[i] == '1')
+		return -3;
+
+	return bin[0];
+}
+
+size_t address_to_script(unsigned char *out, size_t outsz, const char *addr)
+{
+	unsigned char addrbin[25];
+	int addrver;
+	size_t rv;
+
+	if (!b58dec(addrbin, sizeof(addrbin), addr))
+		return 0;
+	addrver = b58check(addrbin, sizeof(addrbin), addr);
+	if (addrver < 0)
+		return 0;
+	switch (addrver) {
+	case 5:    /* Bitcoin script hash */
+	case 196:  /* Testnet script hash */
+		if (outsz < (rv = 23))
+			return rv;
+		out[0] = 0xa9;  /* OP_HASH160 */
+		out[1] = 0x14;  /* push 20 bytes */
+		memcpy(&out[2], &addrbin[1], 20);
+		out[22] = 0x87;  /* OP_EQUAL */
+		return rv;
+	default:
+		if (outsz < (rv = 25))
+			return rv;
+		out[0] = 0x76;  /* OP_DUP */
+		out[1] = 0xa9;  /* OP_HASH160 */
+		out[2] = 0x14;  /* push 20 bytes */
+		memcpy(&out[3], &addrbin[1], 20);
+		out[23] = 0x88;  /* OP_EQUALVERIFY */
+		out[24] = 0xac;  /* OP_CHECKSIG */
+		return rv;
+	}
+}
+
+
 /* Subtract the `struct timeval' values X and Y,
    storing the result in RESULT.
    Return 1 if the difference is negative, otherwise 0.  */
@@ -981,6 +1341,54 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p
 
 	return ret;
 }
+static bool stratum_notify_m7(struct stratum_ctx *sctx, json_t *params)
+{
+	const char *job_id, *prevblock, *accroot, *merkleroot, *version, *ntime;
+	int height;
+	bool clean;
+
+	job_id = json_string_value(json_array_get(params, 0));
+	prevblock = json_string_value(json_array_get(params, 1));
+	accroot = json_string_value(json_array_get(params, 2));
+	merkleroot = json_string_value(json_array_get(params, 3));
+	height = json_integer_value(json_array_get(params, 4));
+	version = json_string_value(json_array_get(params, 5));
+	ntime = json_string_value(json_array_get(params, 6));
+	clean = json_is_true(json_array_get(params, 7));
+
+	if (!job_id || !prevblock || !accroot || !merkleroot || 
+		!version || !height || !ntime ||
+		strlen(prevblock) != 32*2 || 
+		strlen(accroot) != 32*2 || 
+		strlen(merkleroot) != 32*2 || 
+		strlen(ntime) != 8*2 || strlen(version) != 2*2) {
+		applog(LOG_ERR, "Stratum (M7) notify: invalid parameters");
+		return false;
+	}
+
+	pthread_mutex_lock(&sctx->work_lock);
+
+	if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id)) {
+		sctx->job.xnonce2 = (unsigned char *)realloc(sctx->job.xnonce2, sctx->xnonce2_size);
+		memset(sctx->job.xnonce2, 0, sctx->xnonce2_size);
+	}
+	free(sctx->job.job_id);
+	sctx->job.job_id = strdup(job_id);
+
+	hex2bin(sctx->job.m7prevblock, prevblock, 32);
+	hex2bin(sctx->job.m7accroot, accroot, 32);
+	hex2bin(sctx->job.m7merkleroot, merkleroot, 32);
+	be64enc(sctx->job.m7height, height);
+	hex2bin(sctx->job.m7version, version, 2);
+	hex2bin(sctx->job.m7ntime, ntime, 8);
+	sctx->job.clean = clean;
+
+	sctx->job.diff = sctx->next_diff;
+
+	pthread_mutex_unlock(&sctx->work_lock);
+
+	return true;
+}
 
 static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
 {
@@ -1177,11 +1585,70 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
 		goto out;
 	id = json_object_get(val, "id");
 	params = json_object_get(val, "params");
+	
+	if (!strcasecmp(method, "mining.notify")) {
+		ret = stratum_notify(sctx, params);
+		goto out;
+	}
+	
+	if (!strcasecmp(method, "mining.set_difficulty")) {
+		ret = stratum_set_difficulty(sctx, params);
+		goto out;
+	}
+	if (!strcasecmp(method, "client.reconnect")) {
+		ret = stratum_reconnect(sctx, params);
+		goto out;
+	}
+	if (!strcasecmp(method, "client.get_version")) {
+		ret = stratum_get_version(sctx, id);
+		goto out;
+	}
+	if (!strcasecmp(method, "client.show_message")) {
+		ret = stratum_show_message(sctx, id, params);
+		goto out;
+	}
+
+out:
+	if (val)
+		json_decref(val);
+
+	return ret;
+}
+
+bool stratum_handle_method_m7(struct stratum_ctx *sctx, const char *s)
+{
+	json_t *val, *id, *params;
+	json_error_t err;
+	const char *method;
+	bool ret = false;
 
+	val = JSON_LOADS(s, &err);
+	if (!val) {
+		applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text);
+		goto out;
+	}
+
+	method = json_string_value(json_object_get(val, "method"));
+	if (!method)
+		goto out;
+	id = json_object_get(val, "id");
+	params = json_object_get(val, "params");
+	/*
 	if (!strcasecmp(method, "mining.notify")) {
 		ret = stratum_notify(sctx, params);
 		goto out;
 	}
+	*/
+	if (!strcasecmp(method, "mining.notify")) {
+//		if (opt_algo == ALGO_M7) {
+			ret = stratum_notify_m7(sctx, params);
+//		} else {
+//			ret = stratum_notify(sctx, params);
+//		}
+		goto out;
+	}
+
+
 	if (!strcasecmp(method, "mining.set_difficulty")) {
 		ret = stratum_set_difficulty(sctx, params);
 		goto out;
@@ -1206,6 +1673,7 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s)
 	return ret;
 }
 
+
 struct thread_q *tq_new(void)
 {
 	struct thread_q *tq;
diff --git a/x11/cuda_x11_shavite512.cu b/x11/cuda_x11_shavite512.cu
index b3fd9258ed..95152e4b8a 100644
--- a/x11/cuda_x11_shavite512.cu
+++ b/x11/cuda_x11_shavite512.cu
@@ -1,4 +1,10 @@
 // aus heavy.cu
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <memory.h>
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 
 typedef unsigned char BitSequence;
@@ -13,6 +19,8 @@ typedef unsigned long long uint64_t;
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 
+ __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
+
 static __constant__ uint32_t d_ShaviteInitVector[16];
 static const uint32_t h_ShaviteInitVector[] = {
 	SPH_C32(0x72FCCDD8), SPH_C32(0x79CA4727), SPH_C32(0x128A077B), SPH_C32(0x40D55AEC),
@@ -23,6 +31,11 @@ static const uint32_t h_ShaviteInitVector[] = {
 
 #include "cuda_x11_aes.cu"
 
+static __device__ uint32_t cuda_swab32(uint32_t x)
+{
+	return __byte_perm(x, 0, 0x0123);
+}
+
 static __device__ __forceinline__ void AES_ROUND_NOKEY(
 	const uint32_t* __restrict__ sharedMemory,
 	uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3)
@@ -54,7 +67,7 @@ static __device__ __forceinline__ void KEY_EXPAND_ELT(
 }
 
 static __device__ void
-c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
+c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg, uint32_t count)
 {
 	uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
 	uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
@@ -63,7 +76,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
 	uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
 	uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
 	uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
-	const uint32_t counter = 512;
+	const uint32_t counter = count;
 
 	p0 = state[0x0];
 	p1 = state[0x1];
@@ -1299,6 +1312,48 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg)
 }
 
 
+
+__global__ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
+{
+	__shared__ uint32_t sharedMemory[1024];
+
+	        aes_gpu_init(sharedMemory);	
+  
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+		uint32_t nounce = startNounce + thread;
+	
+		// kopiere init-state
+		uint32_t state[16];
+
+
+#pragma unroll 16
+		for(int i=0;i<16;i++) {
+			state[i] = d_ShaviteInitVector[i];}
+
+		uint32_t msg[32];
+
+#pragma unroll 32
+		for(int i=0;i<32;i++) {			
+			msg[i]  = c_PaddedMessage80[i];}
+		    msg[19] = cuda_swab32(nounce);
+			msg[20] = 0x80;
+			msg[27] = 0x2800000;
+			msg[31] = 0x2000000;
+
+		c512(sharedMemory, state, msg,640);
+
+uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
+
+#pragma unroll 16
+		for(int i=0;i<16;i++)
+			outHash[i] = state[i];
+
+
+	} //thread < threads
+}
 // Die Hash-Funktion
 __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
 {
@@ -1306,6 +1361,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
 
 	aes_gpu_init(sharedMemory);
 
+
     int thread = (blockDim.x * blockIdx.x + threadIdx.x);
     if (thread < threads)
     {
@@ -1341,12 +1397,12 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
 		msg[30] = 0;
 		msg[31] = 0x02000000;
 
-		c512(sharedMemory, state, msg);
+		c512(sharedMemory, state, msg, 512);
 
 #pragma unroll 16
 		for(int i=0;i<16;i++)
 			Hash[i] = state[i];
-    }
+    } // thread < threads
 }
 
 
@@ -1369,10 +1425,34 @@ __host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t start
     dim3 grid((threads + threadsperblock-1)/threadsperblock);
     dim3 block(threadsperblock);
 
-    // Gr��e des dynamischen Shared Memory Bereichs
     size_t shared_size = 0;
 
     x11_shavite512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
     MyStreamSynchronize(NULL, order, thr_id);
 }
 
+__host__ void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
+{
+	const int threadsperblock = 256;
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	x11_shavite512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+__host__ void x11_shavite512_setBlock_80(void *pdata)
+{
+	// Message mit Padding bereitstellen
+	// lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen.
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, 0, 48);
+	
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+}
+
diff --git a/x13/cuda_haval512.cu b/x13/cuda_haval512.cu
new file mode 100644
index 0000000000..8e6882a982
--- /dev/null
+++ b/x13/cuda_haval512.cu
@@ -0,0 +1,553 @@
+/*
+ * Haval-512
+ * 
+ * Built on cbuchner1's implementation, actual hashing code
+ * heavily based on phm's sgminer
+ *
+ */
+
+/*
+ * Haval-512 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+#define USE_SHARED 1
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#include "cuda_helper.h"
+
+
+
+
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+__constant__ uint32_t c_PaddedMessage80[32];
+static __constant__ uint32_t initVector[8];
+
+static const uint32_t c_initVector[8] = {
+	SPH_C32(0x243F6A88),
+	SPH_C32(0x85A308D3),
+	SPH_C32(0x13198A2E),
+	SPH_C32(0x03707344),
+	SPH_C32(0xA4093822),
+	SPH_C32(0x299F31D0),
+	SPH_C32(0x082EFA98),
+	SPH_C32(0xEC4E6C89)
+};
+
+#define PASS1(n, in)   { \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 1], SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[ 2], SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[ 5], SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[ 6], SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[ 7], SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 8], SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[10], SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[11], SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[12], SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[13], SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[14], SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[16], SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[17], SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[18], SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[19], SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[20], SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[21], SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[22], SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[25], SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[27], SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[29], SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[30], SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[31], SPH_C32(0x00000000)); \
+	} 
+
+#define PASS2(n, in)    { \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x452821E6)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0x38D01377)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0xBE5466CF)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[18], SPH_C32(0x34E90C6C)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[11], SPH_C32(0xC0AC29B7)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[28], SPH_C32(0xC97C50DD)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 7], SPH_C32(0x3F84D5B5)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[16], SPH_C32(0xB5470917)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x9216D5D9)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0x8979FB1B)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[20], SPH_C32(0xD1310BA6)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0x98DFB5AC)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0x2FFD72DB)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xD01ADFB7)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 4], SPH_C32(0xB8E1AFED)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 8], SPH_C32(0x6A267E96)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[30], SPH_C32(0xBA7C9045)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0xF12C7F99)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x24A19947)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[ 9], SPH_C32(0xB3916CF7)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x0801F2E2)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[24], SPH_C32(0x858EFC16)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[29], SPH_C32(0x636920D8)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 6], SPH_C32(0x71574E69)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0xA458FEA3)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[12], SPH_C32(0xF4933D7E)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[15], SPH_C32(0x0D95748F)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[13], SPH_C32(0x728EB658)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0x718BCD58)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0x82154AEE)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x7B54A41D)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0xC25A59B5)); \
+	} 
+
+#define PASS3(n, in)    { \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x9C30D539)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x2AF26013)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 4], SPH_C32(0xC5D1B023)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0x286085F0)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0xCA417918)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[17], SPH_C32(0xB8DB38EF)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 8], SPH_C32(0x8E79DCB0)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[22], SPH_C32(0x603A180E)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[29], SPH_C32(0x6C9E0E8B)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0xB01E8A3E)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[25], SPH_C32(0xD71577C1)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[12], SPH_C32(0xBD314B27)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[24], SPH_C32(0x78AF2FDA)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[30], SPH_C32(0x55605C60)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0xE65525F3)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[26], SPH_C32(0xAA55AB94)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[31], SPH_C32(0x57489862)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[15], SPH_C32(0x63E81440)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 7], SPH_C32(0x55CA396A)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x2AAB10B6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0xB4CC5C34)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[ 0], SPH_C32(0x1141E8CE)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[18], SPH_C32(0xA15486AF)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0x7C72E993)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[13], SPH_C32(0xB3EE1411)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x636FBC2A)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x2BA9C55D)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[10], SPH_C32(0x741831F6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[23], SPH_C32(0xCE5C3E16)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x9B87931E)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 5], SPH_C32(0xAFD6BA33)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[ 2], SPH_C32(0x6C24CF5C)); \
+	} 
+
+#define PASS4(n, in)  { \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x7A325381)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 4], SPH_C32(0x28958677)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 0], SPH_C32(0x3B8F4898)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[14], SPH_C32(0x6B4BB9AF)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0xC4BFE81B)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[ 7], SPH_C32(0x66282193)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x61D809CC)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0xFB21A991)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[26], SPH_C32(0x487CAC60)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x5DEC8032)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[30], SPH_C32(0xEF845D5D)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0xE98575B1)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDC262302)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0xEB651B88)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[19], SPH_C32(0x23893E81)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 3], SPH_C32(0xD396ACC5)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[22], SPH_C32(0x0F6D6FF3)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[11], SPH_C32(0x83F44239)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[31], SPH_C32(0x2E0B4482)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[21], SPH_C32(0xA4842004)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 8], SPH_C32(0x69C8F04A)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[27], SPH_C32(0x9E1F9B5E)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[12], SPH_C32(0x21C66842)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 9], SPH_C32(0xF6E96C9A)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[ 1], SPH_C32(0x670C9C61)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[29], SPH_C32(0xABD388F0)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 5], SPH_C32(0x6A51A0D2)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[15], SPH_C32(0xD8542F68)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x960FA728)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xAB5133A3)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0x6EEF0B6C)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[13], SPH_C32(0x137A3BE4)); \
+	}
+
+#define PASS5(n, in)    { \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[27], SPH_C32(0xBA3BF050)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0x7EFB2A98)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0xA1F1651D)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[26], SPH_C32(0x39AF0176)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x66CA593E)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x82430E88)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[20], SPH_C32(0x8CEE8619)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[29], SPH_C32(0x456F9FB4)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x7D84A5C3)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 0], SPH_C32(0x3B8B5EBE)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[12], SPH_C32(0xE06F75D8)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[ 7], SPH_C32(0x85C12073)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[13], SPH_C32(0x401A449F)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 8], SPH_C32(0x56C16AA6)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x4ED3AA62)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[10], SPH_C32(0x363F7706)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x1BFEDF72)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x429B023D)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[14], SPH_C32(0x37D0D724)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[30], SPH_C32(0xD00A1248)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDB0FEAD3)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 6], SPH_C32(0x49F1C09B)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x075372C9)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[24], SPH_C32(0x80991B7B)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 2], SPH_C32(0x25D479D8)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0xF6E8DEF7)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[16], SPH_C32(0xE3FE501A)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0xB6794C3B)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x976CE0BD)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 1], SPH_C32(0x04C006BA)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[25], SPH_C32(0xC1A94FB6)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x409F60C4)); \
+	} 
+
+#define F1(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0))
+
+
+#define F2(x6, x5, x4, x3, x2, x1, x0) \
+	(((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \
+	^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0)))
+
+
+#define F3(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0))
+
+
+#define F4(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \
+	^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \
+	^ ((x2) & (x6)) ^ (x0))
+
+#define F5(x6, x5, x4, x3, x2, x1, x0) \
+	(((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)))
+
+#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x3, x4, x1, x0, x5, x2, x6)
+#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x6, x2, x1, x0, x3, x4, x5)
+#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x2, x6, x0, x4, x3, x1, x5)
+#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
+	F4(x1, x5, x3, x2, x0, x4, x6)
+#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
+	F5(x2, x5, x0, x6, x4, x3, x1)
+
+
+#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c)   { \
+		uint32_t t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
+		(x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \
+			+ (w) + (c)); \
+	} 
+
+__global__ void m7_haval256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+	
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+		uint32_t nounce = startNounce + thread;
+			
+union {
+uint32_t h4[16];
+uint64_t h8[8];
+} hash;  
+
+		
+	uint32_t u0, u1, u2, u3, u4, u5, u6, u7; 
+	uint32_t s0,s1,s2,s3,s4,s5,s6,s7;
+	uint32_t buf[32];
+	s0 = initVector[0];
+	s1 = initVector[1];
+	s2 = initVector[2];
+	s3 = initVector[3];
+    s4 = initVector[4];
+	s5 = initVector[5];
+	s6 = initVector[6];
+	s7 = initVector[7];
+
+		u0 = s0; 
+		u1 = s1; 
+		u2 = s2; 
+		u3 = s3; 
+		u4 = s4; 
+		u5 = s5; 
+		u6 = s6; 
+		u7 = s7; 		
+///////// input big /////////////////////        
+#pragma unroll 29
+		for (int i=0;i<29;i++) {
+			buf[i]=c_PaddedMessage80[i];} 
+			buf[29]=nounce;
+			buf[30]=c_PaddedMessage80[30]+0x00010000;  //need to fix that
+			buf[31]=0;
+			
+			PASS1(5, buf); 
+		    PASS2(5, buf); 
+		    PASS3(5, buf); 
+		    PASS4(5, buf); 
+		    PASS5(5, buf); 
+		   
+
+		    s0 = sph_t32(s0 + u0); 
+		    s1 = sph_t32(s1 + u1); 
+		    s2 = sph_t32(s2 + u2); 
+		    s3 = sph_t32(s3 + u3); 
+		    s4 = sph_t32(s4 + u4); 
+		    s5 = sph_t32(s5 + u5); 
+		    s6 = sph_t32(s6 + u6); 
+		    s7 = sph_t32(s7 + u7); 
+		    u0 = s0; 
+		    u1 = s1; 
+		    u2 = s2; 
+		    u3 = s3; 
+		    u4 = s4; 
+	     	u5 = s5; 
+	 	    u6 = s6; 
+	    	u7 = s7; 
+
+            
+/////////////////////
+#pragma unroll 32
+		for (int i=0;i<32;i++) {buf[i]=0;}
+		    
+			buf[29]=0x40290000;
+			buf[30]=0x000003d0;
+
+			
+
+			PASS1(5, buf); 
+		    PASS2(5, buf); 
+		    PASS3(5, buf); 
+		    PASS4(5, buf); 
+		    PASS5(5, buf); 
+		   
+			
+		    s0 = sph_t32(s0 + u0); 
+		    s1 = sph_t32(s1 + u1); 
+		    s2 = sph_t32(s2 + u2); 
+		    s3 = sph_t32(s3 + u3); 
+		    s4 = sph_t32(s4 + u4); 
+		    s5 = sph_t32(s5 + u5); 
+		    s6 = sph_t32(s6 + u6); 
+		    s7 = sph_t32(s7 + u7); 
+////////////////////
+	        hash.h4[0]=s0;
+            hash.h4[1]=s1;
+	        hash.h4[2]=s2;
+            hash.h4[3]=s3;
+	        hash.h4[4]=s4;
+            hash.h4[5]=s5;  
+	        hash.h4[6]=s6;
+            hash.h4[7]=s7;
+
+#pragma unroll 4
+for (int i=0;i<4;i++) {outputHash[i*threads+thread]=hash.h8[i];} 
+ } // threads
+}
+
+__global__ void haval256_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
+{
+	
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+
+        int hashPosition = nounce - startNounce;
+
+
+        uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
+		
+			
+union {
+uint8_t h1[64];
+uint32_t h4[16];
+uint64_t h8[8];
+} hash;  
+
+		
+	uint32_t u0, u1, u2, u3, u4, u5, u6, u7; 
+	uint32_t s0,s1,s2,s3,s4,s5,s6,s7;
+	uint32_t buf[32];
+	s0 = initVector[0];
+	s1 = initVector[1];
+	s2 = initVector[2];
+	s3 = initVector[3];
+    s4 = initVector[4];
+	s5 = initVector[5];
+	s6 = initVector[6];
+	s7 = initVector[7];
+
+		u0 = s0; 
+		u1 = s1; 
+		u2 = s2; 
+		u3 = s3; 
+		u4 = s4; 
+		u5 = s5; 
+		u6 = s6; 
+		u7 = s7; 
+	
+        
+	    #pragma unroll 16
+		for (int i=0;i<16;i++) {
+			hash.h4[i]= inpHash[i];}
+		
+///////// input big /////////////////////        
+#pragma unroll 32
+		for (int i=0;i<32;i++) {
+			if (i<16) {buf[i]=hash.h4[i];} else {
+				       buf[i]=0;}}
+		    buf[16]=0x00000001;
+			buf[29]=0x40290000;
+			buf[30]=0x00000200;
+			
+			PASS1(5, buf); 
+		    PASS2(5, buf); 
+		    PASS3(5, buf); 
+		    PASS4(5, buf); 
+		    PASS5(5, buf); 
+		   
+
+		    s0 = sph_t32(s0 + u0); 
+		    s1 = sph_t32(s1 + u1); 
+		    s2 = sph_t32(s2 + u2); 
+		    s3 = sph_t32(s3 + u3); 
+		    s4 = sph_t32(s4 + u4); 
+		    s5 = sph_t32(s5 + u5); 
+		    s6 = sph_t32(s6 + u6); 
+		    s7 = sph_t32(s7 + u7); 
+
+	        hash.h4[0]=s0;
+            hash.h4[1]=s1;
+	        hash.h4[2]=s2;
+            hash.h4[3]=s3;
+	        hash.h4[4]=s4;
+            hash.h4[5]=s5;
+	        hash.h4[6]=s6;
+            hash.h4[7]=s7;
+
+      #pragma unroll 16
+      for (int u = 0; u < 16; u ++) 
+            inpHash[u] = hash.h4[u];    
+ } // threads
+}
+
+
+void haval256_cpu_init(int thr_id, int threads)
+{
+    
+	
+	cudaMemcpyToSymbol(initVector,c_initVector,sizeof(c_initVector),0, cudaMemcpyHostToDevice);
+	
+}
+
+__host__ void haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtab Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	haval256_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__ void haval256_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122, 0, 6);
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+
+}
+
+__host__ void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtob Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid(threads/threadsperblock);
+	dim3 block(threadsperblock);
+//	dim3 grid(1);
+//	dim3 block(1);
+	size_t shared_size = 0;
+	
+	m7_haval256_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
diff --git a/x13/cuda_m7_sha256.cu b/x13/cuda_m7_sha256.cu
new file mode 100644
index 0000000000..fcc92fc74d
--- /dev/null
+++ b/x13/cuda_m7_sha256.cu
@@ -0,0 +1,526 @@
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include <stdio.h> 
+#include <stdint.h>
+#include <memory.h>
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#define ROTR    SPH_ROTR32
+#include "cuda_helper.h"
+#define host_swab32(x)        ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) )
+
+ __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
+__constant__ uint64_t pTarget[4];
+__constant__ uint32_t pbuf[8];
+uint32_t *d_mnounce[8];
+uint32_t *d_MNonce[8];
+
+
+static __constant__ uint32_t H256[8];
+static __constant__ uint32_t K[64];
+// muss expandiert werden
+__constant__ uint32_t sha256_gpu_blockHeader[16]; // 2x512 Bit Message
+__constant__ uint32_t sha256_gpu_register[8];
+
+
+static const uint32_t cpu_H256[8] = {
+	SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
+	SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
+	SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
+};
+static const uint32_t cpu_K[64] = {
+	SPH_C32(0x428A2F98), SPH_C32(0x71374491),
+	SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
+	SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
+	SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
+	SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
+	SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
+	SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
+	SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
+	SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
+	SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
+	SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
+	SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
+	SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
+	SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
+	SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
+	SPH_C32(0x06CA6351), SPH_C32(0x14292967),
+	SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
+	SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
+	SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
+	SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
+	SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
+	SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
+	SPH_C32(0xD192E819), SPH_C32(0xD6990624),
+	SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
+	SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
+	SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
+	SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
+	SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
+	SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
+	SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
+	SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
+	SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
+};
+
+
+static __device__ __forceinline__ uint32_t bsg2_0(uint32_t x)
+{
+	uint32_t r1 = SPH_ROTR32(x,2);
+	uint32_t r2 = SPH_ROTR32(x,13);
+	uint32_t r3 = SPH_ROTR32(x,22);
+	return xor3b(r1,r2,r3); 
+}
+static __device__ __forceinline__ uint32_t bsg2_1(uint32_t x)
+{
+	uint32_t r1 = SPH_ROTR32(x,6);
+	uint32_t r2 = SPH_ROTR32(x,11);
+	uint32_t r3 = SPH_ROTR32(x,25);
+	return xor3b(r1,r2,r3);
+}
+static __device__ __forceinline__ uint32_t ssg2_0(uint32_t x)
+{
+	uint64_t r1 = SPH_ROTR32(x,7);
+	uint64_t r2 = SPH_ROTR32(x,18);
+	uint64_t r3 = shr_t32(x,3);
+	return xor3b(r1,r2,r3);
+}
+static __device__ __forceinline__ uint32_t ssg2_1(uint32_t x)
+{
+	uint64_t r1 = SPH_ROTR32(x,17);
+	uint64_t r2 = SPH_ROTR32(x,19);
+	uint64_t r3 = shr_t32(x,10);
+	return xor3b(r1,r2,r3);
+}
+
+static __device__ __forceinline__ void sha2_step1(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
+	                                              uint32_t in,const uint32_t Kshared)
+{
+uint32_t t1,t2;
+uint32_t vxandx = xandx(e, f, g);
+uint32_t bsg21 =bsg2_1(e);
+uint32_t bsg20 =bsg2_0(a);
+uint32_t andorv =andor32(a,b,c);
+
+t1 = h + bsg21 + vxandx + Kshared + in; 
+t2 = bsg20 + andorv; 
+d = d + t1; 
+h = t1 + t2; 
+}
+
+static __forceinline__ void sha2_step1_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
+	                                              uint32_t in,const uint32_t Kshared)
+{
+
+
+
+uint32_t t1,t2;
+uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
+uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
+uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
+uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
+
+t1 = h + bsg21 + vxandx + Kshared + in; 
+t2 = bsg20 + andorv; 
+d = d + t1; 
+h = t1 + t2; 
+}
+
+static __device__ __forceinline__ void sha2_step2(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
+	                                              uint32_t* in,uint32_t pc,const uint32_t Kshared)
+{
+uint32_t t1,t2;
+
+int pcidx1 = (pc-2) & 0xF;
+int pcidx2 = (pc-7) & 0xF;
+int pcidx3 = (pc-15) & 0xF;
+uint32_t inx0 = in[pc];
+uint32_t inx1 = in[pcidx1];
+uint32_t inx2 = in[pcidx2];
+uint32_t inx3 = in[pcidx3];
+
+
+uint32_t ssg21 = ssg2_1(inx1);
+uint32_t ssg20 = ssg2_0(inx3);
+uint32_t vxandx = xandx(e, f, g);
+uint32_t bsg21 =bsg2_1(e);
+uint32_t bsg20 =bsg2_0(a);
+uint32_t andorv =andor32(a,b,c);
+
+in[pc] = ssg21+inx2+ssg20+inx0;
+
+t1 = h + bsg21 + vxandx + Kshared + in[pc]; 
+t2 = bsg20 + andorv; 
+d =  d + t1; 
+h = t1 + t2; 
+
+}
+
+static __forceinline__ void sha2_step2_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h,
+	                                              uint32_t* in,uint32_t pc,const uint32_t Kshared)
+{
+uint32_t t1,t2;
+
+int pcidx1 = (pc-2) & 0xF;
+int pcidx2 = (pc-7) & 0xF;
+int pcidx3 = (pc-15) & 0xF;
+uint32_t inx0 = in[pc];
+uint32_t inx1 = in[pcidx1];
+uint32_t inx2 = in[pcidx2];
+uint32_t inx3 = in[pcidx3];
+
+
+uint32_t ssg21 = ROTR(inx1, 17) ^ ROTR(inx1, 19) ^ SPH_T32((inx1) >> 10); //ssg2_1(inx1);
+uint32_t ssg20 = ROTR(inx3, 7) ^ ROTR(inx3, 18) ^ SPH_T32((inx3) >> 3); //ssg2_0(inx3);
+uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g);
+uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e);
+uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a);
+uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c);
+
+in[pc] = ssg21+inx2+ssg20+inx0;
+
+t1 = h + bsg21 + vxandx + Kshared + in[pc]; 
+t2 = bsg20 + andorv; 
+d =  d + t1; 
+h = t1 + t2; 
+
+}
+
+
+static __device__ __forceinline__ void sha2_round_body(uint32_t* in, uint32_t* r,const uint32_t* Kshared)
+{
+		
+		
+		uint32_t a=r[0];
+        uint32_t b=r[1];
+        uint32_t c=r[2];
+        uint32_t d=r[3];
+        uint32_t e=r[4];
+        uint32_t f=r[5];
+        uint32_t g=r[6];
+        uint32_t h=r[7];
+			
+		sha2_step1(a,b,c,d,e,f,g,h,in[0],Kshared[0]);
+		sha2_step1(h,a,b,c,d,e,f,g,in[1],Kshared[1]);
+		sha2_step1(g,h,a,b,c,d,e,f,in[2],Kshared[2]);
+		sha2_step1(f,g,h,a,b,c,d,e,in[3],Kshared[3]);
+		sha2_step1(e,f,g,h,a,b,c,d,in[4],Kshared[4]);
+		sha2_step1(d,e,f,g,h,a,b,c,in[5],Kshared[5]);
+		sha2_step1(c,d,e,f,g,h,a,b,in[6],Kshared[6]);
+		sha2_step1(b,c,d,e,f,g,h,a,in[7],Kshared[7]);
+		sha2_step1(a,b,c,d,e,f,g,h,in[8],Kshared[8]);
+		sha2_step1(h,a,b,c,d,e,f,g,in[9],Kshared[9]);
+		sha2_step1(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
+		sha2_step1(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
+		sha2_step1(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
+		sha2_step1(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
+		sha2_step1(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
+		sha2_step1(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
+
+#pragma unroll 3
+		for (int i=0;i<3;i++) {
+
+		sha2_step2(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]);
+		sha2_step2(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]);
+		sha2_step2(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]);
+		sha2_step2(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]);
+		sha2_step2(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]);
+		sha2_step2(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]);
+		sha2_step2(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]);
+		sha2_step2(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]);
+		sha2_step2(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]);
+		sha2_step2(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]);
+		sha2_step2(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
+		sha2_step2(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
+		sha2_step2(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
+		sha2_step2(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
+		sha2_step2(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
+		sha2_step2(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
+
+		}
+		
+		
+
+		 r[0] = r[0] + a;
+		 r[1] = r[1] + b;
+		 r[2] = r[2] + c;
+		 r[3] = r[3] + d;
+		 r[4] = r[4] + e;
+		 r[5] = r[5] + f;
+		 r[6] = r[6] + g;
+		 r[7] = r[7] + h;
+}
+
+static __forceinline__ void sha2_round_body_host(uint32_t* in, uint32_t* r,const uint32_t* Kshared)
+{
+		
+		
+		uint32_t a=r[0];
+        uint32_t b=r[1];
+        uint32_t c=r[2];
+        uint32_t d=r[3];
+        uint32_t e=r[4];
+        uint32_t f=r[5];
+        uint32_t g=r[6];
+        uint32_t h=r[7];
+			
+		sha2_step1_host(a,b,c,d,e,f,g,h,in[0],Kshared[0]);
+		sha2_step1_host(h,a,b,c,d,e,f,g,in[1],Kshared[1]);
+		sha2_step1_host(g,h,a,b,c,d,e,f,in[2],Kshared[2]);
+		sha2_step1_host(f,g,h,a,b,c,d,e,in[3],Kshared[3]);
+		sha2_step1_host(e,f,g,h,a,b,c,d,in[4],Kshared[4]);
+		sha2_step1_host(d,e,f,g,h,a,b,c,in[5],Kshared[5]);
+		sha2_step1_host(c,d,e,f,g,h,a,b,in[6],Kshared[6]);
+		sha2_step1_host(b,c,d,e,f,g,h,a,in[7],Kshared[7]);
+		sha2_step1_host(a,b,c,d,e,f,g,h,in[8],Kshared[8]);
+		sha2_step1_host(h,a,b,c,d,e,f,g,in[9],Kshared[9]);
+		sha2_step1_host(g,h,a,b,c,d,e,f,in[10],Kshared[10]);
+		sha2_step1_host(f,g,h,a,b,c,d,e,in[11],Kshared[11]);
+		sha2_step1_host(e,f,g,h,a,b,c,d,in[12],Kshared[12]);
+		sha2_step1_host(d,e,f,g,h,a,b,c,in[13],Kshared[13]);
+		sha2_step1_host(c,d,e,f,g,h,a,b,in[14],Kshared[14]);
+		sha2_step1_host(b,c,d,e,f,g,h,a,in[15],Kshared[15]);
+
+
+		for (int i=0;i<3;i++) {
+
+		sha2_step2_host(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]);
+		sha2_step2_host(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]);
+		sha2_step2_host(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]);
+		sha2_step2_host(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]);
+		sha2_step2_host(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]);
+		sha2_step2_host(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]);
+		sha2_step2_host(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]);
+		sha2_step2_host(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]);
+		sha2_step2_host(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]);
+		sha2_step2_host(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]);
+		sha2_step2_host(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]);
+		sha2_step2_host(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]);
+		sha2_step2_host(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]);
+		sha2_step2_host(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]);
+		sha2_step2_host(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]);
+		sha2_step2_host(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]);
+
+		}
+
+		 r[0] = r[0] + a;
+		 r[1] = r[1] + b;
+		 r[2] = r[2] + c;
+		 r[3] = r[3] + d;
+		 r[4] = r[4] + e;
+		 r[5] = r[5] + f;
+		 r[6] = r[6] + g;
+		 r[7] = r[7] + h;
+}
+
+
+__global__ void __launch_bounds__(512,1) m7_sha256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+   
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+
+		uint32_t nounce = startNounce +  thread ; // original implementation
+
+        uint32_t buf[8];
+		uint32_t in2[16]={0};
+		uint32_t in3[16]={0};
+
+        #pragma unroll 13
+		for (int i=0;i<13;i++) {in2[i]= cuda_swab32(c_PaddedMessage80[i+16]);}
+		in2[13]=cuda_swab32(nounce);
+		in2[14]=cuda_swab32(c_PaddedMessage80[30]);
+
+		                        in3[15]=0x3d0;
+          
+        #pragma unroll 8
+		for (int i=0;i<8;i++) {buf[i]= pbuf[i];}    
+
+                    sha2_round_body(in2,buf,K);
+					sha2_round_body(in3,buf,K);
+
+#pragma unroll 4
+for (int i=0;i<4;i++) {outputHash[i*threads+thread]=cuda_swab32ll(((uint64_t*)buf)[i]);}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////	  
+	} // threads
+
+}
+
+
+__global__ void  m7_sha256_gpu_hash_300(int threads, uint32_t startNounce, uint64_t *g_hash1, uint64_t *g_nonceVector, uint32_t *resNounce)
+{
+/*	
+	__shared__ uint32_t Kshared[64];
+	if (threadIdx.x < 64) {
+		Kshared[threadIdx.x]=K[threadIdx.x];
+	}
+	__syncthreads();
+*/
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+
+        
+     
+		
+union {
+uint8_t h1[304];
+uint32_t h4[76];
+uint64_t h8[38];
+} hash;  
+
+
+        uint32_t in[16],buf[8];
+
+		
+		#pragma unroll 8
+		for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*i+thread]);}
+        #pragma unroll 8
+		for (int i=0;i<8;i++) {buf[i] = H256[i];}    
+
+		sha2_round_body(in,buf,K);
+
+		#pragma unroll 8
+		for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+8)+thread]);}
+		sha2_round_body(in,buf,K);
+
+		#pragma unroll 8
+		for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+16)+thread]);}
+		sha2_round_body(in,buf,K);
+
+		#pragma unroll 8
+		for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+24)+thread]);}
+		sha2_round_body(in,buf,K);
+
+		#pragma unroll 5
+		for (int i=0;i<5;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+32)+thread]);}
+		((uint64_t*)in)[5]= g_hash1[threads*(5+32)+thread];
+		in[11]=0;
+		in[12]=0;
+		in[13]=0;
+		in[14]=0;
+
+
+                   in[15]=0x968;
+				   
+                   int it=0;				
+				   do {
+                   in[15]-=8;
+				   it++;
+				   }  while (((uint8_t*)in)[44-it]==0);
+				   ((uint8_t*)in)[44-it+1]=0x80;
+		
+           ((uint64_t*)in)[5]= cuda_swab32ll(((uint64_t*)in)[5]);
+
+				   sha2_round_body(in,buf,K);
+
+uint32_t nounce = startNounce +thread;
+		bool rc = true;
+
+
+    if (cuda_swab32ll(((uint64_t*)buf)[3]) > pTarget[3]) {rc = false;} 
+//// only needed for solo mining, commenting it out will probably increased rejected block (no big deal actually)
+	/*
+	else if (cuda_swab32ll(((uint64_t*)buf)[3]) == pTarget[3]) {  // in case ptarget=buf=0
+		          if (cuda_swab32ll(((uint64_t*)buf)[2]) > pTarget[2]) {rc = false;} 
+	         else if (cuda_swab32ll(((uint64_t*)buf)[2]) == pTarget[2]) {
+				         if (cuda_swab32ll(((uint64_t*)buf)[1]) > pTarget[1]) {rc = false;} 
+	                     else if (cuda_swab32ll(((uint64_t*)buf)[1]) == pTarget[1]) {
+				                  if (cuda_swab32ll(((uint64_t*)buf)[0]) > pTarget[0]) {rc = false;} 
+								  else if (cuda_swab32ll(((uint64_t*)buf)[0]) == pTarget[0]) {rc = true;}
+						 }}}
+      */      
+	
+	
+
+		if(rc == true)
+		{
+			if(resNounce[0] > nounce)
+				resNounce[0] = nounce;
+
+		}
+
+
+////
+	} // threads
+}
+
+
+
+__host__ void m7_sha256_cpu_init(int thr_id, int threads)
+{
+	// Kopiere die Hash-Tabellen in den GPU-Speicher
+	cudaMemcpyToSymbol(	H256,cpu_H256,sizeof(cpu_H256),0, cudaMemcpyHostToDevice );
+	cudaMemcpyToSymbol(	K,cpu_K,sizeof(cpu_K),0, cudaMemcpyHostToDevice );
+	cudaMalloc(&d_MNonce[thr_id], sizeof(uint32_t)); 
+	cudaMallocHost(&d_mnounce[thr_id], 1*sizeof(uint32_t));
+}
+
+
+__host__  uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector,uint64_t *d_hash, int order)
+{
+	
+	uint32_t result = 0xffffffff;
+	cudaMemset(d_MNonce[thr_id], 0xff, sizeof(uint32_t));
+	//const int threadsperblock = 384; // Alignment mit mixtob Gr�sse. NICHT �NDERN
+	const int threadsperblock = 512;
+	
+	dim3 grid(threads/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	
+	m7_sha256_gpu_hash_300<<<grid, block, shared_size>>>(threads, startNounce, d_hash, d_nonceVector, d_MNonce[thr_id]);
+	cudaMemcpy(d_mnounce[thr_id], d_MNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	MyStreamSynchronize(NULL, order, thr_id);
+	result = *d_mnounce[thr_id];
+	return result;
+}
+
+
+__host__ void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 512; // Alignment mit mixtob Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid(threads/threadsperblock);
+	dim3 block(threadsperblock); 
+//	dim3 grid(1);
+//	dim3 block(1);
+	size_t shared_size = 0;
+	
+	m7_sha256_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__ void m7_sha256_setBlock_120(void *pdata,const void *ptarget)  //not useful
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x80;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); //useless
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol( pTarget, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	/// do first loop here... ///
+    
+	uint32_t * alt_data = (uint32_t*) PaddedMessage; 
+	uint32_t in[16],buf[8];
+	for (int i=0;i<16;i++) {in[i]= host_swab32(alt_data[i]);}
+	for (int i=0;i<8;i++) {buf[i]= cpu_H256[i];}     
+			                sha2_round_body_host(in,buf,cpu_K);
+    cudaMemcpyToSymbol( pbuf, buf, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+}
diff --git a/x13/cuda_mul.cu b/x13/cuda_mul.cu
new file mode 100644
index 0000000000..53794e041d
--- /dev/null
+++ b/x13/cuda_mul.cu
@@ -0,0 +1,366 @@
+/*
+ * tiger-192 djm34
+ * 
+ */
+
+/*
+ * tiger-192 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+
+#include "cuda_helper.h"
+
+
+// aus heavy.cu
+
+extern int device_major[8];
+extern int device_minor[8];
+extern int compute_version[8];
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+static __forceinline__ __device__ void mul_unroll1_core_test(int threads, int thread, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+	uint32_t B0, B1, B2, B3, B4, B5;
+	LOHI(B0, B1, am[thread]);
+	LOHI(B2, B3, am[threads + thread]);
+	LOHI(B4, B5, am[2 * threads + thread]);
+
+
+
+#pragma unroll
+	for (int i = 0; i<35; i++) { w[i*threads + thread] = 0; }
+#if __CUDA_ARCH__ < 500
+#pragma unroll    
+#endif
+	for (int i = 0; i<32; i++) {
+		uint32_t Q0;
+		uint32_t Q1;
+		LOHI(Q0, Q1, bm[i*threads + thread]);
+		//		uint32_t W0,W1,W2,W3,W4,W5,W6,W7;
+		uint4 Wa, Wb;
+		LOHI(Wa.x, Wa.y, w[i*threads + thread]);
+		LOHI(Wa.z, Wa.w, w[(i + 1)*threads + thread]);
+		LOHI(Wb.x, Wb.y, w[(i + 2)*threads + thread]);
+		LOHI(Wb.z, Wb.w, w[(i + 3)*threads + thread]);
+
+
+		asm("{\n\t"
+			".reg .u32 b0,b1; \n\t"
+			"mad.lo.cc.u32      b0,%7,%13,%0; \n\t"
+			"madc.hi.cc.u32     b1,%7,%13,0; \n\t"
+			"mov.u32 %0,b0; \n\t"
+			"madc.lo.cc.u32  b1,%8,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%8,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%1;      \n\t"
+			"mov.u32 %1,b1; \n\t"
+			"madc.lo.cc.u32 b0,%9,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%9,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%2;      \n\t"
+			"mov.u32 %2,b0; \n\t"
+			"madc.lo.cc.u32 b1,%10,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%10,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%3;      \n\t"
+			"mov.u32 %3,b1; \n\t"
+			"madc.lo.cc.u32 b0,%11,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%11,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%4;      \n\t"
+			"mov.u32 %4,b0; \n\t"
+			"madc.lo.cc.u32 b1,%12,%13,b1; \n\t"
+			"madc.hi.cc.u32 %6,%12,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%5;      \n\t"
+			"addc.u32     %6,%6,0;   \n\t"
+			"mov.u32 %5,b1; \n\t"
+			"}\n\t"
+			: "+r"(Wa.x), "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z)
+			: "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q0));
+		///////////////////////////
+		asm("{\n\t"
+			".reg .u32 b0,b1; \n\t"
+			"mad.lo.cc.u32      b0,%7,%13,%0; \n\t"
+			"madc.hi.cc.u32     b1,%7,%13,0; \n\t"
+			"mov.u32 %0,b0; \n\t"
+			"madc.lo.cc.u32  b1,%8,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%8,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%1;      \n\t"
+			"mov.u32 %1,b1; \n\t"
+			"madc.lo.cc.u32 b0,%9,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%9,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%2;      \n\t"
+			"mov.u32 %2,b0; \n\t"
+			"madc.lo.cc.u32 b1,%10,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%10,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%3;      \n\t"
+			"mov.u32 %3,b1; \n\t"
+			"madc.lo.cc.u32 b0,%11,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%11,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%4;      \n\t"
+			"mov.u32 %4,b0; \n\t"
+			"madc.lo.cc.u32 b1,%12,%13,b1; \n\t"
+			"madc.hi.cc.u32 %6,%12,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%5;      \n\t"
+			"addc.u32     %6,%6,0;   \n\t"
+			"mov.u32 %5,b1; \n\t"
+			"}\n\t"
+			: "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z), "+r"(Wb.w)
+			: "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q1));
+
+		w[i*threads + thread] = MAKE_ULONGLONG(Wa.x, Wa.y);
+		w[(i + 1)*threads + thread] = MAKE_ULONGLONG(Wa.z, Wa.w);
+		w[(i + 2)*threads + thread] = MAKE_ULONGLONG(Wb.x, Wb.y);
+		w[(i + 3)*threads + thread] = MAKE_ULONGLONG(Wb.z, Wb.w);
+
+
+
+	}
+
+}
+
+static __forceinline__ __device__ void mul_unroll2_core_test(int threads, int thread, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+	uint32_t B0, B1, B2, B3, B4, B5;
+	LOHI(B0, B1, am[thread]);
+	LOHI(B2, B3, am[threads + thread]);
+	LOHI(B4, B5, am[2 * threads + thread]);
+
+
+
+#pragma unroll
+	for (int i = 0; i<38; i++) { w[i*threads + thread] = 0; }
+#if __CUDA_ARCH__ < 500
+#pragma unroll    
+#endif
+	for (int i = 0; i<35; i++) {
+		uint32_t Q0;
+		uint32_t Q1;
+		LOHI(Q0, Q1, bm[i*threads + thread]);
+		//		uint32_t W0, W1, W2, W3, W4, W5, W6, W7;
+		uint4 Wa, Wb;
+		LOHI(Wa.x, Wa.y, w[i*threads + thread]);
+		LOHI(Wa.z, Wa.w, w[(i + 1)*threads + thread]);
+		LOHI(Wb.x, Wb.y, w[(i + 2)*threads + thread]);
+		LOHI(Wb.z, Wb.w, w[(i + 3)*threads + thread]);
+
+
+		asm("{\n\t"
+			".reg .u32 b0,b1; \n\t"
+			"mad.lo.cc.u32      b0,%7,%13,%0; \n\t"
+			"madc.hi.cc.u32     b1,%7,%13,0; \n\t"
+			"mov.u32 %0,b0; \n\t"
+			"madc.lo.cc.u32  b1,%8,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%8,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%1;      \n\t"
+			"mov.u32 %1,b1; \n\t"
+			"madc.lo.cc.u32 b0,%9,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%9,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%2;      \n\t"
+			"mov.u32 %2,b0; \n\t"
+			"madc.lo.cc.u32 b1,%10,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%10,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%3;      \n\t"
+			"mov.u32 %3,b1; \n\t"
+			"madc.lo.cc.u32 b0,%11,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%11,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%4;      \n\t"
+			"mov.u32 %4,b0; \n\t"
+			"madc.lo.cc.u32 b1,%12,%13,b1; \n\t"
+			"madc.hi.cc.u32 %6,%12,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%5;      \n\t"
+			"addc.u32     %6,%6,0;   \n\t"
+			"mov.u32 %5,b1; \n\t"
+			"}\n\t"
+			: "+r"(Wa.x), "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z)
+			: "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q0));
+		///////////////////////////
+		asm("{\n\t"
+			".reg .u32 b0,b1; \n\t"
+			"mad.lo.cc.u32      b0,%7,%13,%0; \n\t"
+			"madc.hi.cc.u32     b1,%7,%13,0; \n\t"
+			"mov.u32 %0,b0; \n\t"
+			"madc.lo.cc.u32  b1,%8,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%8,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%1;      \n\t"
+			"mov.u32 %1,b1; \n\t"
+			"madc.lo.cc.u32 b0,%9,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%9,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%2;      \n\t"
+			"mov.u32 %2,b0; \n\t"
+			"madc.lo.cc.u32 b1,%10,%13,b1; \n\t"
+			"madc.hi.cc.u32 b0,%10,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%3;      \n\t"
+			"mov.u32 %3,b1; \n\t"
+			"madc.lo.cc.u32 b0,%11,%13,b0; \n\t"
+			"madc.hi.cc.u32 b1,%11,%13,0; \n\t"
+			"add.cc.u32      b0,b0,%4;      \n\t"
+			"mov.u32 %4,b0; \n\t"
+			"madc.lo.cc.u32 b1,%12,%13,b1; \n\t"
+			"madc.hi.cc.u32 %6,%12,%13,0; \n\t"
+			"add.cc.u32      b1,b1,%5;      \n\t"
+			"addc.u32     %6,%6,0;   \n\t"
+			"mov.u32 %5,b1; \n\t"
+			"}\n\t"
+			: "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z), "+r"(Wb.w)
+			: "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q1));
+
+		w[i*threads + thread] = MAKE_ULONGLONG(Wa.x, Wa.y);
+		w[(i + 1)*threads + thread] = MAKE_ULONGLONG(Wa.z, Wa.w);
+		w[(i + 2)*threads + thread] = MAKE_ULONGLONG(Wb.x, Wb.y);
+		w[(i + 3)*threads + thread] = MAKE_ULONGLONG(Wb.z, Wb.w);
+
+
+
+	}
+
+}
+
+
+__global__ void __launch_bounds__(512, 3) m7_bigmul_unroll1_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+
+		mul_unroll1_core_test(threads, thread, am, bm, w);
+	} // threads
+}
+
+__global__ void __launch_bounds__(256, 2) m7_bigmul_unroll1_gpu_50(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		mul_unroll1_core_test(threads, thread, am, bm, w);
+	} // threads
+}
+
+__global__ void __launch_bounds__(256, 4) m7_bigmul_unroll1_gpu_80(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		mul_unroll1_core_test(threads, thread, am, bm, w);
+	} // threads
+}
+
+
+__global__ void __launch_bounds__(512, 2) m7_bigmul_unroll2_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		mul_unroll2_core_test(threads, thread, am, bm, w);
+
+	} //// threads
+}
+
+__global__ void __launch_bounds__(512, 2) m7_bigmul_unroll2_gpu_50(int threads, uint64_t* am, uint64_t* bm, uint64_t *w)
+{
+
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+
+	if (thread < threads)
+	{
+		mul_unroll2_core_test(threads, thread, am, bm, w);
+	} //// threads
+}
+
+
+
+
+__host__ void m7_bigmul_unroll1_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order)
+{
+
+	int threadsperblock = 512;
+	if (compute_version[thr_id] >= 50) { threadsperblock = 256; }
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	if (compute_version[thr_id]==50) {
+		m7_bigmul_unroll1_gpu_50 << <grid, block, shared_size >> >(threads, Hash1, Hash2, finalHash);
+	}
+	else if (compute_version[thr_id]==52) {
+		m7_bigmul_unroll1_gpu_80 << <grid, block, shared_size >> >(threads, Hash1, Hash2, finalHash);
+	}
+	else {
+		m7_bigmul_unroll1_gpu << <grid, block, shared_size >> >(threads, Hash1, Hash2, finalHash);
+	}
+
+}
+
+__host__ void m7_bigmul_unroll2_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order)
+{
+
+	const int threadsperblock = 512;
+
+	dim3 grid((threads + threadsperblock - 1) / threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	if (compute_version[thr_id] >= 50) {
+		m7_bigmul_unroll2_gpu << <grid, block, shared_size >> >(threads, Hash1, Hash2, finalHash);
+	}
+	else {
+		m7_bigmul_unroll2_gpu << <grid, block, shared_size >> >(threads, Hash1, Hash2, finalHash);
+	}
+
+}
+
+
+
+
+__host__ void m7_bigmul_init(int thr_id, int threads)
+{
+	// why I am here ?
+}
\ No newline at end of file
diff --git a/x13/cuda_mul2.cu b/x13/cuda_mul2.cu
new file mode 100644
index 0000000000..a3e587cce2
--- /dev/null
+++ b/x13/cuda_mul2.cu
@@ -0,0 +1,459 @@
+/*
+ * sha256 djm34, catia
+ * 
+ */
+
+/*
+ * sha-256 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+
+#undef _GLIBCXX_ATOMIC_BUILTINS
+#undef _GLIBCXX_USE_INT128
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+#include "cuda_helper.h"
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+
+
+typedef struct t4_t{
+	uint64_t high,low;
+} t4_t;
+
+__device__ __forceinline__ 
+ulonglong2 umul64wide (unsigned long long int a, 
+                       unsigned long long int b)
+{
+    ulonglong2 res;
+    asm ("{\n\t"
+         ".reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi;\n\t"
+         "mov.b64         {alo,ahi}, %2;   \n\t"
+         "mov.b64         {blo,bhi}, %3;   \n\t"
+         "mul.lo.u32      r0, alo, blo;    \n\t"
+         "mul.hi.u32      r1, alo, blo;    \n\t"
+         "mad.lo.cc.u32   r1, alo, bhi, r1;\n\t"
+         "madc.hi.u32     r2, alo, bhi,  0;\n\t"
+         "mad.lo.cc.u32   r1, ahi, blo, r1;\n\t"
+         "madc.hi.cc.u32  r2, ahi, blo, r2;\n\t"
+         "madc.hi.u32     r3, ahi, bhi,  0;\n\t"
+         "mad.lo.cc.u32   r2, ahi, bhi, r2;\n\t"
+         "addc.u32        r3, r3,  0;      \n\t"
+         "mov.b64         %0, {r0,r1};     \n\t"  
+         "mov.b64         %1, {r2,r3};     \n\t"
+         "}"
+         : "=l"(res.x), "=l"(res.y)
+         : "l"(a), "l"(b));
+    return res;
+}
+
+#define umul_ppmm(h,l,m,n) \
+{ \
+	ulonglong2 foom = umul64wide(m,n); \
+	h = foom.y; \
+	l = foom.x; \
+}
+
+
+__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n)
+{
+    asm ("{\n\t"
+         ".reg .u32 o0, o1, o2, o3, o4;    \n\t"
+         ".reg .u32 o5, o6, o7, i8, i9;    \n\t"
+         ".reg .u32 i10, i11, i12, i13;    \n\t"
+         ".reg .u32 i14, i15, i16, i17;    \n\t"
+         ".reg .u32 i18, i19, i20, i21;    \n\t"
+         ".reg .u32 i22, i23;              \n\t"
+         "mov.b64         { i8, i9}, %4;   \n\t"
+         "mov.b64         {i10,i11}, %5;   \n\t"
+         "mov.b64         {i12,i13}, %6;   \n\t"
+         "mov.b64         {i14,i15}, %7;   \n\t"
+         "mov.b64         {i16,i17}, %8;   \n\t"
+         "mov.b64         {i18,i19}, %9;   \n\t"
+         "mov.b64         {i20,i21},%10;   \n\t"
+         "mov.b64         {i22,i23},%11;   \n\t"
+         "mul.lo.u32      o0,  i8, i16;    \n\t"
+         "mul.hi.u32      o1,  i8, i16;    \n\t"
+         "mad.lo.cc.u32   o1,  i8, i17, o1;\n\t"
+         "madc.hi.u32     o2,  i8, i17,  0;\n\t"
+         "mad.lo.cc.u32   o1,  i9, i16, o1;\n\t"
+         "madc.hi.cc.u32  o2,  i9, i16, o2;\n\t"
+         "madc.hi.u32     o3,  i8, i18,  0;\n\t"
+         "mad.lo.cc.u32   o2,  i8, i18, o2;\n\t"
+         "madc.hi.cc.u32  o3,  i9, i17, o3;\n\t"
+         "madc.hi.u32     o4,  i8, i19,  0;\n\t"
+         "mad.lo.cc.u32   o2,  i9, i17, o2;\n\t"
+         "madc.hi.cc.u32  o3, i10, i16, o3;\n\t"
+         "madc.hi.cc.u32  o4,  i9, i18, o4;\n\t"
+         "addc.u32        o5,   0,   0;\n\t"
+         "mad.lo.cc.u32   o2, i10, i16, o2;\n\t"
+	 "madc.lo.cc.u32  o3,  i8, i19, o3;\n\t"
+         "madc.hi.cc.u32  o4, i10, i17, o4;\n\t"
+         "madc.hi.cc.u32  o5,  i9, i19, o5;\n\t"
+         "addc.u32        o6,   0,   0;\n\t"
+         "mad.lo.cc.u32   o3,  i9, i18, o3;\n\t"
+         "madc.hi.cc.u32  o4, i11, i16, o4;\n\t"
+         "madc.hi.cc.u32  o5, i10, i18, o5;\n\t"
+         "addc.u32        o6,   0,  o6;\n\t"
+         "mad.lo.cc.u32   o3, i10, i17, o3;\n\t"
+         "addc.u32        o4,   0,  o4;\n\t"
+         "mad.hi.cc.u32   o5, i11, i17, o5;\n\t"
+         "madc.hi.cc.u32  o6, i10, i19, o6;\n\t"
+         "addc.u32        o7,   0,   0;\n\t"
+         "mad.lo.cc.u32   o3, i11, i16, o3;\n\t"
+         "madc.lo.cc.u32  o4,  i9, i19, o4;\n\t"
+         "addc.u32        o5,   0,  o5;\n\t"
+         "mad.hi.cc.u32   o6, i11, i18, o6;\n\t"
+         "addc.u32        o7,   0,  o7;\n\t"
+         "mad.lo.cc.u32   o4, i10, i18, o4;\n\t"
+         "addc.u32        o5,   0,  o5;\n\t"
+         "mad.hi.u32      o7, i11, i19, o7;\n\t"
+         "mad.lo.cc.u32   o4, i11, i17, o4;\n\t"
+         "addc.u32        o5,   0,  o5;\n\t"
+         "mad.lo.cc.u32   o5, i10, i19, o5;\n\t"
+         "addc.u32        o6,   0,  o6;\n\t"
+         "mad.lo.cc.u32   o5, i11, i18, o5;\n\t"
+         "addc.u32        o6,   0,  o6;\n\t"
+         "mad.lo.cc.u32   o6, i11, i19, o6;\n\t"
+         "addc.u32        o7,   0,  o7;\n\t"
+         "mov.b64         %0, {o0,o1};     \n\t"
+         "mov.b64         %1, {o2,o3};     \n\t"
+         "mov.b64         %2, {o4,o5};     \n\t"
+         "mov.b64         %3, {o6,o7};     \n\t"
+         "}"
+         : "=l"(l->low), "=l"(l->high), "=l"(h->low), "=l"(h->high)
+         : "l"(m.low), "l"(m.high), "l"(0ULL), "l"(0ULL),
+           "l"(n.low), "l"(n.high), "l"(0ULL), "l"(0ULL));
+}
+
+#if 0
+__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n){
+	uint64_t th,tl;
+	uint32_t c,c2;
+	umul_ppmm(l->high,l->low,m.low,n.low);
+
+	umul_ppmm(th,tl,m.high,n.low);
+	l->high += tl;
+	c = (l->high < tl);
+	h->low = th + c;
+	c = (h->low < c);
+	h->high = c;
+
+	//Second word
+	umul_ppmm(th,tl,m.low,n.high);
+	l->high += tl;
+	c = l->high < tl;
+	h->low += th;
+	c2 = h->low < th;
+	h->low += c;
+	c2 += h->low < c;
+	h->high += c2;
+
+	umul_ppmm(th,tl,m.high,n.high);
+	h->low += tl;
+	c = h->low < tl;
+	h->high += th + c;
+}
+#endif
+
+
+__device__ __forceinline__ t4_t T4(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g){
+	t4_t ret;
+	ret.high = g[(idx*2 + 1)*threads + thread];
+	ret.low = g[(idx*2)*threads + thread];
+
+	
+
+	return ret;
+}
+
+__device__ __forceinline__ void T4_store(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g, t4_t val){
+	g[(idx*2 + 1)*threads + thread]=val.high;
+	g[(idx*2)*threads + thread]=val.low;
+
+	
+
+}
+
+__device__ __forceinline__ void T4_set(t4_t *d, uint64_t v){
+	d->high = 0;
+	d->low = v;
+}
+
+__device__ __forceinline__ t4_t T4_add(t4_t a, t4_t b){
+	t4_t ret;
+	uint32_t c=0;
+	ret.low = a.low + b.low;
+	if(ret.low < a.low)
+	    c=1;
+	ret.high = a.high + b.high + c;
+	return ret;
+}
+
+__device__ __forceinline__ t4_t T4_add(uint64_t a, t4_t b){
+	t4_t ret;
+	uint32_t c=0;
+	ret.low = a + b.low;
+	if(ret.low < a)
+	    c=1;
+	ret.high = b.high + c;
+	return ret;
+}
+
+
+__device__ __forceinline__ uint32_t T4_lt(t4_t a, t4_t b){
+	if(a.high < b.high)
+		return 1;
+	if(a.high == b.high && a.low < b.low)
+		return 1;
+	return 0;
+}
+
+__device__ __forceinline__ uint32_t T4_gt(t4_t a, uint64_t b){
+	if(a.high)
+		return 1;
+	if(a.low > b)
+		return 1;
+	return 0;
+}
+
+
+__device__ void mulScalarT4(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, t4_t sml, uint32_t *size){
+  t4_t ul, cl, hpl, lpl;
+  uint32_t i;
+  T4_set(&cl,0);
+  for(i=0; i < len; i++) {
+      ul = T4(thread,threads,i,g_v);
+      umul_ppmmT4 (&hpl, &lpl, ul, sml);
+
+      lpl = T4_add(lpl,cl);
+      cl = T4_add(T4_lt(lpl,cl),hpl);
+
+      T4_store(thread,threads,i,g_p,lpl);
+    }
+
+    T4_store(thread,threads,len,g_p,cl);
+    *size = len + T4_gt(cl,0);
+}
+
+
+__device__ void mulScalar(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, uint64_t sml, uint32_t *size){
+  uint64_t ul, cl, hpl, lpl;
+  uint32_t i;
+  cl = 0;
+  for(i=0; i < len; i++) {
+      ul = g_v[i*threads + thread];
+      umul_ppmm (hpl, lpl, ul, sml);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      g_p[i*threads + thread] = lpl;
+    }
+
+    g_p[len*threads + thread] = cl;
+    *size = len + (cl != 0);
+}
+
+uint64_t __device__ addmul_1g (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, uint64_t a){
+	uint64_t carry=0;
+	uint32_t i;
+	uint64_t ul,lpl,hpl,rl;
+
+	for(i=0; i < xsz; i++){
+		
+      		ul = x[i*threads + thread];
+      		umul_ppmm (hpl, lpl, ul, a);
+
+      		lpl += carry;
+      		carry = (lpl < carry) + hpl;
+
+      		rl = sum[(i+sofst) * threads + thread];
+      		lpl = rl + lpl;
+      		carry += lpl < rl;
+      		sum[(i+sofst)*threads + thread] = lpl;
+    	}
+
+  	return carry;
+}
+
+t4_t __device__ addmul_1gT4 (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, t4_t a){
+	t4_t carry;
+	uint32_t i;
+	t4_t ul,lpl,hpl,rl;
+	T4_set(&carry,0);
+	for(i=0; i < xsz; i++){
+		
+      		ul = T4(thread,threads,i,x);
+      		umul_ppmmT4 (&hpl, &lpl, ul, a);
+
+      		lpl = T4_add(lpl,carry);
+      		carry = T4_add(T4_lt(lpl,carry), hpl);
+
+      		rl = T4(thread,threads,i+sofst,sum);
+      		lpl = T4_add(rl,lpl);
+      		carry = T4_add(T4_lt(lpl,rl),carry);
+      		T4_store(thread,threads,i+sofst,sum,lpl);
+    	}
+
+  	return carry;
+}
+
+
+
+__global__ void gpu_mul(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p)
+{
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+	if(ulegs < vlegs){
+		uint64_t t1=ulegs;
+		ulegs = vlegs;
+		vlegs = t1;
+
+		uint64_t *t2 = g_u;
+		g_u = g_v;
+		g_v = t2;
+	}
+
+	uint32_t vofst=1,rofst=1,psize=0;
+	mulScalar(thread,threads,ulegs,g_p,g_u,g_v[thread],&psize);   
+
+#if 1
+
+  	while (vofst < vlegs) {
+
+	    	g_p[(psize+0)*threads+thread] = 0;
+
+            	g_p[(ulegs+rofst)*threads + thread] = addmul_1g (thread, threads, g_p ,rofst , g_u, ulegs,  g_v[vofst*threads+thread]);
+
+	    	vofst++; rofst++;
+	    	psize++;
+        }
+
+
+
+
+#endif
+    }
+}
+
+__global__ void  gpu_mulT4(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p)
+{
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+
+	if(ulegs < vlegs){  
+		uint64_t t1=ulegs;
+		ulegs = vlegs;   
+		vlegs = t1;
+
+		uint64_t *t2 = g_u;
+		g_u = g_v;
+		g_v = t2;
+	}
+
+	ulegs >>= 1; vlegs >>= 1;
+
+	
+
+	uint32_t vofst=1,rofst=1,psize=0;
+	mulScalarT4(thread,threads,ulegs,g_p,g_u,T4(thread,threads,0,g_v),&psize);
+
+#if 1
+	t4_t zero;
+	T4_set(&zero,0);
+	
+
+#pragma unroll
+	    for (vofst=1;vofst<vlegs;vofst++) {  
+	    	T4_store(thread,threads,psize,g_p,zero);
+
+            	T4_store(thread,threads,ulegs+rofst,g_p,addmul_1gT4 (thread, threads, g_p ,rofst , g_u, ulegs,T4(thread,threads,vofst,g_v)));
+			rofst++;
+	    	psize++;
+        }
+
+
+#endif
+    }
+}
+
+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
+{
+   if (code != cudaSuccess) 
+   {
+      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+      if (abort) exit(code);
+   }
+}
+
+
+__host__ void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p,int order)
+{
+
+	const int threadsperblock = 512; // Alignment mit mixtab Gr\F6sse. NICHT \C4NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size =0;
+  	gpu_mul<<<grid, block, shared_size>>>(threads, alegs, blegs, g_a, g_b, g_p) ;
+
+}
+
+__host__ void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order)
+{
+
+	const int threadsperblock = 256; 
+
+	dim3 grid(2*(threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size =0;
+  	
+	gpu_mulT4<<<grid, block, shared_size>>>(threads, blegs, alegs, g_b, g_a, g_p) ;
+}
+
+__host__ void mul_init(){
+
+}
diff --git a/x13/cuda_ripemd160.cu b/x13/cuda_ripemd160.cu
new file mode 100644
index 0000000000..eaa2b2390f
--- /dev/null
+++ b/x13/cuda_ripemd160.cu
@@ -0,0 +1,400 @@
+/*
+ * ripemd-160 djm34
+ * 
+ */
+
+/*
+ * ripemd-160 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+
+#include "cuda_helper.h"
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#define ROTL    SPH_ROTL32
+
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+
+ __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
+static __constant__ uint32_t gpu_IV[5];
+static __constant__ uint32_t bufo[5];
+static const uint32_t IV[5] = {
+	SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE),
+	SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0)
+};
+
+/*
+ * Round functions for RIPEMD-128 and RIPEMD-160.
+ */
+#define F1(x, y, z)   ((x) ^ (y) ^ (z))
+#define F2(x, y, z)   ((((y) ^ (z)) & (x)) ^ (z))
+#define F3(x, y, z)   (((x) | ~(y)) ^ (z))
+#define F4(x, y, z)   ((((x) ^ (y)) & (z)) ^ (y))
+#define F5(x, y, z)   ((x) ^ ((y) | ~(z)))
+
+/*
+ * Round constants for RIPEMD-160.
+ */
+#define K11    SPH_C32(0x00000000)
+#define K12    SPH_C32(0x5A827999)
+#define K13    SPH_C32(0x6ED9EBA1)
+#define K14    SPH_C32(0x8F1BBCDC)
+#define K15    SPH_C32(0xA953FD4E)
+
+#define K21    SPH_C32(0x50A28BE6)
+#define K22    SPH_C32(0x5C4DD124)
+#define K23    SPH_C32(0x6D703EF3)
+#define K24    SPH_C32(0x7A6D76E9)
+#define K25    SPH_C32(0x00000000)
+
+#define RR(a, b, c, d, e, f, s, r, k)    { \
+		a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \
+		c = ROTL(c, 10); \
+	} 
+
+#define ROUND1(a, b, c, d, e, f, s, r, k)  \
+	RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
+
+#define ROUND2(a, b, c, d, e, f, s, r, k)  \
+	RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
+
+
+
+#define RIPEMD160_ROUND_BODY(in, h)   { \
+		uint32_t A1, B1, C1, D1, E1; \
+		uint32_t A2, B2, C2, D2, E2; \
+		uint32_t tmp; \
+ \
+		A1 = A2 = (h)[0]; \
+		B1 = B2 = (h)[1]; \
+		C1 = C2 = (h)[2]; \
+		D1 = D2 = (h)[3]; \
+		E1 = E2 = (h)[4]; \
+ \
+		ROUND1(A, B, C, D, E, F1, 11, in[ 0],  1); \
+		ROUND1(E, A, B, C, D, F1, 14, in[ 1],  1); \
+		ROUND1(D, E, A, B, C, F1, 15, in[ 2],  1); \
+		ROUND1(C, D, E, A, B, F1, 12, in[ 3],  1); \
+		ROUND1(B, C, D, E, A, F1,  5, in[ 4],  1); \
+		ROUND1(A, B, C, D, E, F1,  8, in[ 5],  1); \
+		ROUND1(E, A, B, C, D, F1,  7, in[ 6],  1); \
+		ROUND1(D, E, A, B, C, F1,  9, in[ 7],  1); \
+		ROUND1(C, D, E, A, B, F1, 11, in[ 8],  1); \
+		ROUND1(B, C, D, E, A, F1, 13, in[ 9],  1); \
+		ROUND1(A, B, C, D, E, F1, 14, in[10],  1); \
+		ROUND1(E, A, B, C, D, F1, 15, in[11],  1); \
+		ROUND1(D, E, A, B, C, F1,  6, in[12],  1); \
+		ROUND1(C, D, E, A, B, F1,  7, in[13],  1); \
+		ROUND1(B, C, D, E, A, F1,  9, in[14],  1); \
+		ROUND1(A, B, C, D, E, F1,  8, in[15],  1); \
+ \
+		ROUND1(E, A, B, C, D, F2,  7, in[ 7],  2); \
+		ROUND1(D, E, A, B, C, F2,  6, in[ 4],  2); \
+		ROUND1(C, D, E, A, B, F2,  8, in[13],  2); \
+		ROUND1(B, C, D, E, A, F2, 13, in[ 1],  2); \
+		ROUND1(A, B, C, D, E, F2, 11, in[10],  2); \
+		ROUND1(E, A, B, C, D, F2,  9, in[ 6],  2); \
+		ROUND1(D, E, A, B, C, F2,  7, in[15],  2); \
+		ROUND1(C, D, E, A, B, F2, 15, in[ 3],  2); \
+		ROUND1(B, C, D, E, A, F2,  7, in[12],  2); \
+		ROUND1(A, B, C, D, E, F2, 12, in[ 0],  2); \
+		ROUND1(E, A, B, C, D, F2, 15, in[ 9],  2); \
+		ROUND1(D, E, A, B, C, F2,  9, in[ 5],  2); \
+		ROUND1(C, D, E, A, B, F2, 11, in[ 2],  2); \
+		ROUND1(B, C, D, E, A, F2,  7, in[14],  2); \
+		ROUND1(A, B, C, D, E, F2, 13, in[11],  2); \
+		ROUND1(E, A, B, C, D, F2, 12, in[ 8],  2); \
+ \
+		ROUND1(D, E, A, B, C, F3, 11, in[ 3],  3); \
+		ROUND1(C, D, E, A, B, F3, 13, in[10],  3); \
+		ROUND1(B, C, D, E, A, F3,  6, in[14],  3); \
+		ROUND1(A, B, C, D, E, F3,  7, in[ 4],  3); \
+		ROUND1(E, A, B, C, D, F3, 14, in[ 9],  3); \
+		ROUND1(D, E, A, B, C, F3,  9, in[15],  3); \
+		ROUND1(C, D, E, A, B, F3, 13, in[ 8],  3); \
+		ROUND1(B, C, D, E, A, F3, 15, in[ 1],  3); \
+		ROUND1(A, B, C, D, E, F3, 14, in[ 2],  3); \
+		ROUND1(E, A, B, C, D, F3,  8, in[ 7],  3); \
+		ROUND1(D, E, A, B, C, F3, 13, in[ 0],  3); \
+		ROUND1(C, D, E, A, B, F3,  6, in[ 6],  3); \
+		ROUND1(B, C, D, E, A, F3,  5, in[13],  3); \
+		ROUND1(A, B, C, D, E, F3, 12, in[11],  3); \
+		ROUND1(E, A, B, C, D, F3,  7, in[ 5],  3); \
+		ROUND1(D, E, A, B, C, F3,  5, in[12],  3); \
+ \
+		ROUND1(C, D, E, A, B, F4, 11, in[ 1],  4); \
+		ROUND1(B, C, D, E, A, F4, 12, in[ 9],  4); \
+		ROUND1(A, B, C, D, E, F4, 14, in[11],  4); \
+		ROUND1(E, A, B, C, D, F4, 15, in[10],  4); \
+		ROUND1(D, E, A, B, C, F4, 14, in[ 0],  4); \
+		ROUND1(C, D, E, A, B, F4, 15, in[ 8],  4); \
+		ROUND1(B, C, D, E, A, F4,  9, in[12],  4); \
+		ROUND1(A, B, C, D, E, F4,  8, in[ 4],  4); \
+		ROUND1(E, A, B, C, D, F4,  9, in[13],  4); \
+		ROUND1(D, E, A, B, C, F4, 14, in[ 3],  4); \
+		ROUND1(C, D, E, A, B, F4,  5, in[ 7],  4); \
+		ROUND1(B, C, D, E, A, F4,  6, in[15],  4); \
+		ROUND1(A, B, C, D, E, F4,  8, in[14],  4); \
+		ROUND1(E, A, B, C, D, F4,  6, in[ 5],  4); \
+		ROUND1(D, E, A, B, C, F4,  5, in[ 6],  4); \
+		ROUND1(C, D, E, A, B, F4, 12, in[ 2],  4); \
+ \
+		ROUND1(B, C, D, E, A, F5,  9, in[ 4],  5); \
+		ROUND1(A, B, C, D, E, F5, 15, in[ 0],  5); \
+		ROUND1(E, A, B, C, D, F5,  5, in[ 5],  5); \
+		ROUND1(D, E, A, B, C, F5, 11, in[ 9],  5); \
+		ROUND1(C, D, E, A, B, F5,  6, in[ 7],  5); \
+		ROUND1(B, C, D, E, A, F5,  8, in[12],  5); \
+		ROUND1(A, B, C, D, E, F5, 13, in[ 2],  5); \
+		ROUND1(E, A, B, C, D, F5, 12, in[10],  5); \
+		ROUND1(D, E, A, B, C, F5,  5, in[14],  5); \
+		ROUND1(C, D, E, A, B, F5, 12, in[ 1],  5); \
+		ROUND1(B, C, D, E, A, F5, 13, in[ 3],  5); \
+		ROUND1(A, B, C, D, E, F5, 14, in[ 8],  5); \
+		ROUND1(E, A, B, C, D, F5, 11, in[11],  5); \
+		ROUND1(D, E, A, B, C, F5,  8, in[ 6],  5); \
+		ROUND1(C, D, E, A, B, F5,  5, in[15],  5); \
+		ROUND1(B, C, D, E, A, F5,  6, in[13],  5); \
+ \
+		ROUND2(A, B, C, D, E, F5,  8, in[ 5],  1); \
+		ROUND2(E, A, B, C, D, F5,  9, in[14],  1); \
+		ROUND2(D, E, A, B, C, F5,  9, in[ 7],  1); \
+		ROUND2(C, D, E, A, B, F5, 11, in[ 0],  1); \
+		ROUND2(B, C, D, E, A, F5, 13, in[ 9],  1); \
+		ROUND2(A, B, C, D, E, F5, 15, in[ 2],  1); \
+		ROUND2(E, A, B, C, D, F5, 15, in[11],  1); \
+		ROUND2(D, E, A, B, C, F5,  5, in[ 4],  1); \
+		ROUND2(C, D, E, A, B, F5,  7, in[13],  1); \
+		ROUND2(B, C, D, E, A, F5,  7, in[ 6],  1); \
+		ROUND2(A, B, C, D, E, F5,  8, in[15],  1); \
+		ROUND2(E, A, B, C, D, F5, 11, in[ 8],  1); \
+		ROUND2(D, E, A, B, C, F5, 14, in[ 1],  1); \
+		ROUND2(C, D, E, A, B, F5, 14, in[10],  1); \
+		ROUND2(B, C, D, E, A, F5, 12, in[ 3],  1); \
+		ROUND2(A, B, C, D, E, F5,  6, in[12],  1); \
+ \
+		ROUND2(E, A, B, C, D, F4,  9, in[ 6],  2); \
+		ROUND2(D, E, A, B, C, F4, 13, in[11],  2); \
+		ROUND2(C, D, E, A, B, F4, 15, in[ 3],  2); \
+		ROUND2(B, C, D, E, A, F4,  7, in[ 7],  2); \
+		ROUND2(A, B, C, D, E, F4, 12, in[ 0],  2); \
+		ROUND2(E, A, B, C, D, F4,  8, in[13],  2); \
+		ROUND2(D, E, A, B, C, F4,  9, in[ 5],  2); \
+		ROUND2(C, D, E, A, B, F4, 11, in[10],  2); \
+		ROUND2(B, C, D, E, A, F4,  7, in[14],  2); \
+		ROUND2(A, B, C, D, E, F4,  7, in[15],  2); \
+		ROUND2(E, A, B, C, D, F4, 12, in[ 8],  2); \
+		ROUND2(D, E, A, B, C, F4,  7, in[12],  2); \
+		ROUND2(C, D, E, A, B, F4,  6, in[ 4],  2); \
+		ROUND2(B, C, D, E, A, F4, 15, in[ 9],  2); \
+		ROUND2(A, B, C, D, E, F4, 13, in[ 1],  2); \
+		ROUND2(E, A, B, C, D, F4, 11, in[ 2],  2); \
+ \
+		ROUND2(D, E, A, B, C, F3,  9, in[15],  3); \
+		ROUND2(C, D, E, A, B, F3,  7, in[ 5],  3); \
+		ROUND2(B, C, D, E, A, F3, 15, in[ 1],  3); \
+		ROUND2(A, B, C, D, E, F3, 11, in[ 3],  3); \
+		ROUND2(E, A, B, C, D, F3,  8, in[ 7],  3); \
+		ROUND2(D, E, A, B, C, F3,  6, in[14],  3); \
+		ROUND2(C, D, E, A, B, F3,  6, in[ 6],  3); \
+		ROUND2(B, C, D, E, A, F3, 14, in[ 9],  3); \
+		ROUND2(A, B, C, D, E, F3, 12, in[11],  3); \
+		ROUND2(E, A, B, C, D, F3, 13, in[ 8],  3); \
+		ROUND2(D, E, A, B, C, F3,  5, in[12],  3); \
+		ROUND2(C, D, E, A, B, F3, 14, in[ 2],  3); \
+		ROUND2(B, C, D, E, A, F3, 13, in[10],  3); \
+		ROUND2(A, B, C, D, E, F3, 13, in[ 0],  3); \
+		ROUND2(E, A, B, C, D, F3,  7, in[ 4],  3); \
+		ROUND2(D, E, A, B, C, F3,  5, in[13],  3); \
+ \
+		ROUND2(C, D, E, A, B, F2, 15, in[ 8],  4); \
+		ROUND2(B, C, D, E, A, F2,  5, in[ 6],  4); \
+		ROUND2(A, B, C, D, E, F2,  8, in[ 4],  4); \
+		ROUND2(E, A, B, C, D, F2, 11, in[ 1],  4); \
+		ROUND2(D, E, A, B, C, F2, 14, in[ 3],  4); \
+		ROUND2(C, D, E, A, B, F2, 14, in[11],  4); \
+		ROUND2(B, C, D, E, A, F2,  6, in[15],  4); \
+		ROUND2(A, B, C, D, E, F2, 14, in[ 0],  4); \
+		ROUND2(E, A, B, C, D, F2,  6, in[ 5],  4); \
+		ROUND2(D, E, A, B, C, F2,  9, in[12],  4); \
+		ROUND2(C, D, E, A, B, F2, 12, in[ 2],  4); \
+		ROUND2(B, C, D, E, A, F2,  9, in[13],  4); \
+		ROUND2(A, B, C, D, E, F2, 12, in[ 9],  4); \
+		ROUND2(E, A, B, C, D, F2,  5, in[ 7],  4); \
+		ROUND2(D, E, A, B, C, F2, 15, in[10],  4); \
+		ROUND2(C, D, E, A, B, F2,  8, in[14],  4); \
+ \
+		ROUND2(B, C, D, E, A, F1,  8, in[12],  5); \
+		ROUND2(A, B, C, D, E, F1,  5, in[15],  5); \
+		ROUND2(E, A, B, C, D, F1, 12, in[10],  5); \
+		ROUND2(D, E, A, B, C, F1,  9, in[ 4],  5); \
+		ROUND2(C, D, E, A, B, F1, 12, in[ 1],  5); \
+		ROUND2(B, C, D, E, A, F1,  5, in[ 5],  5); \
+		ROUND2(A, B, C, D, E, F1, 14, in[ 8],  5); \
+		ROUND2(E, A, B, C, D, F1,  6, in[ 7],  5); \
+		ROUND2(D, E, A, B, C, F1,  8, in[ 6],  5); \
+		ROUND2(C, D, E, A, B, F1, 13, in[ 2],  5); \
+		ROUND2(B, C, D, E, A, F1,  6, in[13],  5); \
+		ROUND2(A, B, C, D, E, F1,  5, in[14],  5); \
+		ROUND2(E, A, B, C, D, F1, 15, in[ 0],  5); \
+		ROUND2(D, E, A, B, C, F1, 13, in[ 3],  5); \
+		ROUND2(C, D, E, A, B, F1, 11, in[ 9],  5); \
+		ROUND2(B, C, D, E, A, F1, 11, in[11],  5); \
+ \
+		tmp = SPH_T32((h)[1] + C1 + D2); \
+		(h)[1] = SPH_T32((h)[2] + D1 + E2); \
+		(h)[2] = SPH_T32((h)[3] + E1 + A2); \
+		(h)[3] = SPH_T32((h)[4] + A1 + B2); \
+		(h)[4] = SPH_T32((h)[0] + B1 + C2); \
+		(h)[0] = tmp; \
+	} 
+
+
+__global__ void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+        uint32_t nounce = startNounce + thread ;
+union {
+uint8_t h1[64];
+uint32_t h4[16];
+uint64_t h8[8];
+} hash;  
+
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+
+#define F1(x, y, z)   xor3(x,y,z)
+#define F2(x, y, z)   xandx(x,y,z)
+#define F3(x, y, z)   xornot64(x,y,z)
+#define F4(x, y, z)   xandx(z,x,y)
+#define F5(x, y, z)   xornt64(x,y,z)
+        uint32_t in2[16],in3[16];
+        uint32_t in[16],buf[5]; 
+        #pragma unroll 16
+        for (int i=0;i<16;i++) {if ((i+16)<29)  {in2[i]= c_PaddedMessage80[i+16];} 
+						   else if ((i+16)==29) {in2[i]= nounce;}
+						   else if ((i+16)==30) {in2[i]= c_PaddedMessage80[i+16];}
+						   else                 {in2[i]= 0;}}
+		#pragma unroll 16
+		for (int i=0;i<16;i++) {in3[i]=0;}
+		                        in3[14]=0x3d0;
+         #pragma unroll 5
+		 for (int i=0;i<5;i++) {buf[i]=bufo[i];}
+		 RIPEMD160_ROUND_BODY(in2, buf);		 
+         RIPEMD160_ROUND_BODY(in3, buf);
+
+  
+hash.h4[5]=0; 
+#pragma unroll 5
+for (int i=0;i<5;i++) 
+{hash.h4[i]=buf[i];
+}
+
+#pragma unroll 3
+for (int i=0;i<3;i++) {outputHash[i*threads+thread]=hash.h8[i];}
+
+ }
+}
+
+
+void ripemd160_cpu_init(int thr_id, int threads)
+{
+
+    cudaMemcpyToSymbol(gpu_IV,IV,sizeof(IV),0, cudaMemcpyHostToDevice);
+	
+}
+
+__host__ void ripemd160_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x80;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); //useless
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+#define F1(x, y, z)   ((x) ^ (y) ^ (z))
+#define F2(x, y, z)   ((((y) ^ (z)) & (x)) ^ (z))
+#define F3(x, y, z)   (((x) | ~(y)) ^ (z))
+#define F4(x, y, z)   ((((x) ^ (y)) & (z)) ^ (y))
+#define F5(x, y, z)   ((x) ^ ((y) | ~(z)))	
+	uint32_t* alt_data =(uint32_t*)pdata;
+        uint32_t in[16],buf[5];
+
+	    
+		for (int i=0;i<16;i++) {in[i]= alt_data[i];}
+        
+		
+		for (int i=0;i<5;i++) {buf[i]=IV[i];}
+		
+		 RIPEMD160_ROUND_BODY(in, buf); //no need to calculate it several time (need to moved)
+	cudaMemcpyToSymbol(bufo, buf, 5*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+}
+
+__host__ void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtab Gr�sse. NICHT �NDERN
+
+
+dim3 grid(threads/threadsperblock);
+dim3 block(threadsperblock);
+//dim3 grid(1);
+//dim3 block(1);
+	size_t shared_size =0;
+	m7_ripemd160_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
diff --git a/x13/cuda_sha512.cu b/x13/cuda_sha512.cu
new file mode 100644
index 0000000000..be9276e96c
--- /dev/null
+++ b/x13/cuda_sha512.cu
@@ -0,0 +1,419 @@
+/*
+ * sha512 djm34
+ * 
+ */
+
+/*
+ * sha-512 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+ 
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+#define USE_SHARED 1
+#include "cuda_helper.h"
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+
+
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int device_major[8];
+
+
+__constant__ uint64_t c_PaddedMessage80[16];
+static __constant__ uint64_t H_512[8];
+static __constant__ uint64_t gpu_WK[80];
+static __constant__ uint64_t gpu_W[80];
+
+static const uint64_t H512[8] = {
+	SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
+	SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
+	SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
+	SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
+};
+static __constant__ uint64_t K_512[80];
+
+static const uint64_t K512[80] = {
+	SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
+	SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
+	SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
+	SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
+	SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
+	SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
+	SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
+	SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
+	SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
+	SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
+	SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
+	SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
+	SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
+	SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
+	SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
+	SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
+	SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
+	SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
+	SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
+	SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
+	SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
+	SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
+	SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
+	SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
+	SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
+	SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
+	SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
+	SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
+	SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
+	SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
+	SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
+	SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
+	SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
+	SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
+	SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
+	SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
+	SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
+	SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
+	SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
+	SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
+};
+
+
+static __device__ __forceinline__ uint64_t bsg5_0(uint64_t x)
+{
+	uint64_t r1 = ROTR64(x,28);
+	uint64_t r2 = ROTR64(x,34);
+	uint64_t r3 = ROTR64(x,39);
+	return xor3(r1,r2,r3);
+}
+static __device__ __forceinline__ uint64_t bsg5_1(uint64_t x)
+{
+	uint64_t r1 = ROTR64(x,14);
+	uint64_t r2 = ROTR64(x,18);
+	uint64_t r3 = ROTR64(x,41);
+	return xor3(r1,r2,r3);
+}
+static __device__ __forceinline__ uint64_t ssg5_0(uint64_t x)
+{
+	uint64_t r1 = ROTR64(x,1);
+	uint64_t r2 = ROTR64(x,8);
+	uint64_t r3 = shr_t64(x,7);
+	return xor3(r1,r2,r3);
+}
+static __device__ __forceinline__ uint64_t ssg5_1(uint64_t x)
+{
+	uint64_t r1 = ROTR64(x,19);
+	uint64_t r2 = ROTR64(x,61);
+	uint64_t r3 = shr_t64(x,6);
+	return xor3(r1,r2,r3);
+}
+
+
+static __device__ __forceinline__ void sha3_step2(uint64_t* r,uint64_t* W,uint64_t* K,int ord,int i) 
+{
+int u = 8-ord;
+uint64_t a=r[(0+u)& 7];
+uint64_t b=r[(1+u)& 7];
+uint64_t c=r[(2+u)& 7];
+uint64_t d=r[(3+u)& 7];
+uint64_t e=r[(4+u)& 7];
+uint64_t f=r[(5+u)& 7];
+uint64_t g=r[(6+u)& 7];
+uint64_t h=r[(7+u)& 7];
+
+uint64_t T1, T2;
+T1 = h+bsg5_1(e)+xandx64(e,f,g)+W[i]+K[i];
+T2 = bsg5_0(a) + andor(a,b,c); 
+r[(3+u)& 7] = d + T1; 
+r[(7+u)& 7] = T1 + T2; 
+
+}
+
+static __device__ __forceinline__ void sha3_step3(uint64_t* r,uint64_t* W,int ord,int i) 
+{
+int u = 8-ord;
+uint64_t a=r[(0+u)& 7];
+uint64_t b=r[(1+u)& 7];
+uint64_t c=r[(2+u)& 7];
+uint64_t d=r[(3+u)& 7];
+uint64_t e=r[(4+u)& 7];
+uint64_t f=r[(5+u)& 7];
+uint64_t g=r[(6+u)& 7];
+uint64_t h=r[(7+u)& 7];
+
+uint64_t T1, T2;
+T1 = h+bsg5_1(e)+xandx64(e,f,g)+W[i];
+T2 = bsg5_0(a) + andor(a,b,c); 
+r[(3+u)& 7] = d + T1; 
+r[(7+u)& 7] = T1 + T2; 
+
+}
+
+
+__global__ void sha512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
+{
+
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+
+        int hashPosition = nounce - startNounce;
+
+
+        uint64_t *inpHash = (uint64_t*)&g_hash + 8*thread;
+		
+			
+
+		uint64_t W[80]; 
+        uint64_t r[8];
+#pragma unroll 71
+		for (int i=9;i<80;i++) {W[i]=0;}
+
+#pragma unroll 8
+ 		for (int i = 0; i < 8; i ++) {
+			W[i] = cuda_swab64(inpHash[i]);
+			r[i] = H_512[i];}
+		
+		W[8] = 0x8000000000000000;
+		W[15]= 0x0000000000000200;
+#pragma unroll 64
+		for (int i = 16; i < 80; i ++) 
+ 			W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); 
+
+#if __CUDA_ARCH__ < 500    // go figure...
+#pragma unroll 10
+#endif
+		for (int i = 0; i < 10; i ++) {
+#pragma unroll 8
+			for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K_512,ord,8*i+ord);}
+		}
+
+#pragma unroll 8
+		for (int i = 0; i < 8; i++) {r[i] = sph_t64(r[i] + H_512[i]);}
+
+      #pragma unroll 8
+      for (int u = 0; u < 8; u ++) 
+            inpHash[u] = cuda_swab64(r[u]);    
+ }
+}
+
+
+__global__ void __launch_bounds__(256,3) m7_sha512_gpu50_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+	
+	__shared__ uint64_t K[80];
+	__shared__ uint64_t WK[80];
+	if (threadIdx.x<80) 
+	{
+		WK[threadIdx.x] = gpu_WK[threadIdx.x];
+		K[threadIdx.x] =K_512[threadIdx.x];
+	}
+	__syncthreads();
+	
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+		
+			uint32_t nounce = startNounce + thread;		
+		
+		uint64_t W[80]; 
+        uint64_t r[8];
+#pragma unroll 8
+		for (int i = 0; i < 8; i ++) {r[i] = H_512[i];}
+#pragma unroll 14
+		for (int i = 0; i < 14; i ++) {W[i] = cuda_swab64(c_PaddedMessage80[i]);}			        	
+		    W[14] =  cuda_swab64(REPLACE_HIWORD(c_PaddedMessage80[14],nounce));
+            W[15] =  cuda_swab64(c_PaddedMessage80[15]); 
+
+#pragma unroll 64
+		for (int i = 16; i < 80; i ++) 
+ 			W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); 
+
+#if __CUDA_ARCH__ < 500    // go figure...
+#pragma unroll 10
+#endif
+		for (int i = 0; i < 10; i ++) {
+#pragma unroll 8
+			for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K,ord,8*i+ord); }
+		}
+ uint64_t tempr[8];
+#pragma unroll 8
+		for (int i = 0; i < 8; i++) {tempr[i] = r[i] = sph_t64(r[i] + H_512[i]);}
+
+
+#if __CUDA_ARCH__ < 500    // go figure...
+#pragma unroll 
+#endif 10
+		for (int i = 0; i < 10; i ++) {
+#pragma unroll 8
+			for (int ord=0;ord<8;ord++) {sha3_step3(r,WK,ord,8*i+ord); }
+		}
+
+               
+#pragma unroll 8
+for(int i=0;i<8;i++) {outputHash[i*threads+thread] = cuda_swab64(sph_t64(r[i] + tempr[i]));}
+
+	
+ } /// thread
+}
+
+__global__ void __launch_bounds__(256,4) m7_sha512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+	
+	__shared__ uint64_t K[80];
+	__shared__ uint64_t WK[80];
+	if (threadIdx.x<80) 
+	{
+		WK[threadIdx.x] = gpu_WK[threadIdx.x];
+		K[threadIdx.x] =K_512[threadIdx.x];
+	}
+	__syncthreads();
+	
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+		
+			uint32_t nounce = startNounce + thread;		
+		
+		uint64_t W[80]; 
+        uint64_t r[8];
+#pragma unroll 8
+		for (int i = 0; i < 8; i ++) {r[i] = H_512[i];}
+#pragma unroll 14
+		for (int i = 0; i < 14; i ++) {W[i] = cuda_swab64(c_PaddedMessage80[i]);}			        	
+		    W[14] =  cuda_swab64(REPLACE_HIWORD(c_PaddedMessage80[14],nounce));
+            W[15] =  cuda_swab64(c_PaddedMessage80[15]); 
+
+#pragma unroll 64
+		for (int i = 16; i < 80; i ++) 
+ 			W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); 
+
+#if __CUDA_ARCH__ < 500    // go figure...
+#pragma unroll 10
+#endif
+		for (int i = 0; i < 10; i ++) {
+#pragma unroll 8
+			for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K,ord,8*i+ord); }
+		}
+ uint64_t tempr[8];
+#pragma unroll 8
+		for (int i = 0; i < 8; i++) {tempr[i] = r[i] = sph_t64(r[i] + H_512[i]);}
+
+
+#if __CUDA_ARCH__ < 500    // go figure...
+#pragma unroll 
+#endif 10
+		for (int i = 0; i < 10; i ++) {
+#pragma unroll 8
+			for (int ord=0;ord<8;ord++) {sha3_step3(r,WK,ord,8*i+ord); }
+		}
+
+               
+#pragma unroll 8
+for(int i=0;i<8;i++) {outputHash[i*threads+thread] = cuda_swab64(sph_t64(r[i] + tempr[i]));}
+
+	
+ } /// thread
+}
+
+
+void sha512_cpu_init(int thr_id, int threads)
+{
+#define ROTR64(x, n)        (((x) >> (n)) | ((x) << (64 - (n))))
+#define SPH_T64(x)           ((x) & 0xFFFFFFFFFFFFFFFF)
+#define BSG5_0(x)      (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
+#define BSG5_1(x)      (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
+#define SSG5_0(x)      (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7))
+#define SSG5_1(x)      (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6))
+    cudaMemcpyToSymbol(K_512,K512,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(H_512,H512,sizeof(H512),0, cudaMemcpyHostToDevice);
+	uint64_t W[80],WK[80];
+
+		for (int i = 0; i < 15; i ++) {W[i] = 0;}
+		      W[15]=0x3d0;
+		for (int i = 16; i < 80; i ++) {
+			W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]);} 	   
+		for (int i=0; i<80;i++) {WK[i]=W[i]+K512[i];}
+	 cudaMemcpyToSymbol(gpu_WK,WK,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice);
+}
+
+
+__host__ void sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtab Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size =0;
+	sha512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+
+__host__ void sha512_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x80;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); //useless
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+}
+
+__host__ void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtob Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid(threads/threadsperblock);
+	dim3 block(threadsperblock);
+	size_t shared_size = 0;
+	if (device_major[thr_id]==5) {
+	m7_sha512_gpu50_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+	} else {
+    m7_sha512_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+	}
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+ 
diff --git a/x13/cuda_shabal512.cu b/x13/cuda_shabal512.cu
new file mode 100644
index 0000000000..87527f5b37
--- /dev/null
+++ b/x13/cuda_shabal512.cu
@@ -0,0 +1,415 @@
+/*
+ * Quick and dirty addition of Shabal-512 for X15
+ * 
+ * Built on cbuchner1's implementation, actual hashing code
+ * heavily based on phm's sgminer
+ *
+ */
+
+/*
+ * Shabal-512 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  phm
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+#define USE_SHARED 1
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#define sM    16
+
+#define C32   SPH_C32
+#define T32   SPH_T32
+
+#define O1   13
+#define O2    9
+#define O3    6
+
+
+#if __CUDA_ARCH__ < 350 
+    // Kepler (Compute 3.0)
+    #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#else
+    // Kepler (Compute 3.5)
+    #define SPH_ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
+#endif
+
+
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+__constant__ uint32_t C_512[16]; 
+static const uint32_t C_init_512[] = {
+	C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
+	C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
+	C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
+	C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
+};
+__constant__ uint32_t A_512[16];
+static const uint32_t A_init_512[] = {
+	C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
+	C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
+	C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
+};
+__constant__ uint32_t B_512[16];
+static const uint32_t B_init_512[] = {
+	C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
+	C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
+	C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
+	C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
+};
+
+#define INPUT_BLOCK_ADD    { \
+		B0 = T32(B0 + M0); \
+		B1 = T32(B1 + M1); \
+		B2 = T32(B2 + M2); \
+		B3 = T32(B3 + M3); \
+		B4 = T32(B4 + M4); \
+		B5 = T32(B5 + M5); \
+		B6 = T32(B6 + M6); \
+		B7 = T32(B7 + M7); \
+		B8 = T32(B8 + M8); \
+		B9 = T32(B9 + M9); \
+		BA = T32(BA + MA); \
+		BB = T32(BB + MB); \
+		BC = T32(BC + MC); \
+		BD = T32(BD + MD); \
+		BE = T32(BE + ME); \
+		BF = T32(BF + MF); \
+	} 
+
+#define INPUT_BLOCK_SUB    { \
+		C0 = T32(C0 - M0); \
+		C1 = T32(C1 - M1); \
+		C2 = T32(C2 - M2); \
+		C3 = T32(C3 - M3); \
+		C4 = T32(C4 - M4); \
+		C5 = T32(C5 - M5); \
+		C6 = T32(C6 - M6); \
+		C7 = T32(C7 - M7); \
+		C8 = T32(C8 - M8); \
+		C9 = T32(C9 - M9); \
+		CA = T32(CA - MA); \
+		CB = T32(CB - MB); \
+		CC = T32(CC - MC); \
+		CD = T32(CD - MD); \
+		CE = T32(CE - ME); \
+		CF = T32(CF - MF); \
+	} 
+
+#define XOR_W    { \
+		A00 ^= Wlow; \
+		A01 ^= Whigh; \
+	} 
+
+#define SWAP(v1, v2)    { \
+		uint32_t tmp = (v1); \
+		(v1) = (v2); \
+		(v2) = tmp; \
+	} 
+
+#define SWAP_BC    { \
+		SWAP(B0, C0); \
+		SWAP(B1, C1); \
+		SWAP(B2, C2); \
+		SWAP(B3, C3); \
+		SWAP(B4, C4); \
+		SWAP(B5, C5); \
+		SWAP(B6, C6); \
+		SWAP(B7, C7); \
+		SWAP(B8, C8); \
+		SWAP(B9, C9); \
+		SWAP(BA, CA); \
+		SWAP(BB, CB); \
+		SWAP(BC, CC); \
+		SWAP(BD, CD); \
+		SWAP(BE, CE); \
+		SWAP(BF, CF); \
+	} 
+
+#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)    { \
+		xa0 = T32((xa0 \
+			^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
+			^ xc) * 3U) \
+			^ xb1 ^ (xb2 & ~xb3) ^ xm; \
+		xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
+	} 
+
+#define PERM_STEP_0    { \
+		PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
+	} 
+
+#define PERM_STEP_1   { \
+		PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
+	} 
+
+#define PERM_STEP_2   { \
+		PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
+		PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
+		PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
+		PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
+		PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
+		PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
+		PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
+		PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
+		PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
+		PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
+		PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
+		PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
+		PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
+		PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
+		PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
+		PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
+	} 
+
+#define APPLY_P    { \
+		B0 = T32(B0 << 17) | (B0 >> 15); \
+		B1 = T32(B1 << 17) | (B1 >> 15); \
+		B2 = T32(B2 << 17) | (B2 >> 15); \
+		B3 = T32(B3 << 17) | (B3 >> 15); \
+		B4 = T32(B4 << 17) | (B4 >> 15); \
+		B5 = T32(B5 << 17) | (B5 >> 15); \
+		B6 = T32(B6 << 17) | (B6 >> 15); \
+		B7 = T32(B7 << 17) | (B7 >> 15); \
+		B8 = T32(B8 << 17) | (B8 >> 15); \
+		B9 = T32(B9 << 17) | (B9 >> 15); \
+		BA = T32(BA << 17) | (BA >> 15); \
+		BB = T32(BB << 17) | (BB >> 15); \
+		BC = T32(BC << 17) | (BC >> 15); \
+		BD = T32(BD << 17) | (BD >> 15); \
+		BE = T32(BE << 17) | (BE >> 15); \
+		BF = T32(BF << 17) | (BF >> 15); \
+		PERM_STEP_0; \
+		PERM_STEP_1; \
+		PERM_STEP_2; \
+		A0B = T32(A0B + C6); \
+		A0A = T32(A0A + C5); \
+		A09 = T32(A09 + C4); \
+		A08 = T32(A08 + C3); \
+		A07 = T32(A07 + C2); \
+		A06 = T32(A06 + C1); \
+		A05 = T32(A05 + C0); \
+		A04 = T32(A04 + CF); \
+		A03 = T32(A03 + CE); \
+		A02 = T32(A02 + CD); \
+		A01 = T32(A01 + CC); \
+		A00 = T32(A00 + CB); \
+		A0B = T32(A0B + CA); \
+		A0A = T32(A0A + C9); \
+		A09 = T32(A09 + C8); \
+		A08 = T32(A08 + C7); \
+		A07 = T32(A07 + C6); \
+		A06 = T32(A06 + C5); \
+		A05 = T32(A05 + C4); \
+		A04 = T32(A04 + C3); \
+		A03 = T32(A03 + C2); \
+		A02 = T32(A02 + C1); \
+		A01 = T32(A01 + C0); \
+		A00 = T32(A00 + CF); \
+		A0B = T32(A0B + CE); \
+		A0A = T32(A0A + CD); \
+		A09 = T32(A09 + CC); \
+		A08 = T32(A08 + CB); \
+		A07 = T32(A07 + CA); \
+		A06 = T32(A06 + C9); \
+		A05 = T32(A05 + C8); \
+		A04 = T32(A04 + C7); \
+		A03 = T32(A03 + C6); \
+		A02 = T32(A02 + C5); \
+		A01 = T32(A01 + C4); \
+		A00 = T32(A00 + C3); \
+	} 
+
+#define INCR_W   { \
+		if ((Wlow = T32(Wlow + 1)) == 0) \
+			Whigh = T32(Whigh + 1); \
+	} 
+
+                                        
+__global__ void x13_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
+{
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+
+        int hashPosition = nounce - startNounce;
+
+
+        uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition];
+		
+			
+union {
+uint8_t h1[64];
+uint32_t h4[16];
+uint64_t h8[8];
+} hash;  
+
+		
+        
+	    #pragma unroll 16
+		for (int i=0;i<16;i++) {
+			hash.h4[i]= inpHash[i];}
+		
+///////// input big /////////////////////        
+		uint32_t A00 = A_512[0], A01 = A_512[1], A02 = A_512[2], A03 = A_512[3], A04 = A_512[4], A05 = A_512[5], A06 = A_512[6], A07 = A_512[7],
+	    A08 = A_512[8], A09 = A_512[9], A0A = A_512[10], A0B = A_512[11];
+    uint32_t B0 = B_512[0], B1 = B_512[1], B2 = B_512[2], B3 = B_512[3], B4 = B_512[4], B5 = B_512[5], B6 = B_512[6], B7 = B_512[7],
+	    B8 = B_512[8], B9 = B_512[9], BA = B_512[10], BB = B_512[11], BC = B_512[12], BD = B_512[13], BE = B_512[14], BF = B_512[15];
+    uint32_t C0 = C_512[0], C1 = C_512[1], C2 = C_512[2], C3 = C_512[3], C4 = C_512[4], C5 = C_512[5], C6 = C_512[6], C7 = C_512[7],
+	    C8 = C_512[8], C9 = C_512[9], CA = C_512[10], CB = C_512[11], CC = C_512[12], CD = C_512[13], CE = C_512[14], CF = C_512[15];
+    uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
+    uint32_t Wlow = 1, Whigh = 0;
+
+    M0 = hash.h4[0];
+    M1 = hash.h4[1];
+    M2 = hash.h4[2];
+    M3 = hash.h4[3];
+    M4 = hash.h4[4];
+    M5 = hash.h4[5];
+    M6 = hash.h4[6];
+    M7 = hash.h4[7];
+    M8 = hash.h4[8];
+    M9 = hash.h4[9];
+    MA = hash.h4[10];
+    MB = hash.h4[11];
+    MC = hash.h4[12];
+    MD = hash.h4[13];
+    ME = hash.h4[14];
+    MF = hash.h4[15];
+
+    INPUT_BLOCK_ADD;
+    XOR_W;
+    APPLY_P;
+    INPUT_BLOCK_SUB;
+    SWAP_BC;
+    INCR_W;
+
+    M0 = 0x80;
+    M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0;
+
+    INPUT_BLOCK_ADD;
+    XOR_W;
+    APPLY_P;
+
+ #pragma unroll 3
+ for (unsigned i = 0; i < 3; i ++) {
+	SWAP_BC;
+	XOR_W;
+	APPLY_P;
+    }
+
+    hash.h4[0] = B0;
+    hash.h4[1] = B1;
+    hash.h4[2] = B2;
+    hash.h4[3] = B3;
+    hash.h4[4] = B4;
+    hash.h4[5] = B5;
+    hash.h4[6] = B6;
+    hash.h4[7] = B7;
+    hash.h4[8] = B8;
+    hash.h4[9] = B9;
+    hash.h4[10] = BA;
+    hash.h4[11] = BB;
+    hash.h4[12] = BC;
+    hash.h4[13] = BD;
+    hash.h4[14] = BE;
+    hash.h4[15] = BF;
+      
+      #pragma unroll 16
+      for (int u = 0; u < 16; u ++) 
+            inpHash[u] = hash.h4[u];    
+ }
+}
+
+
+void x13_shabal512_cpu_init(int thr_id, int threads)
+{
+    
+	cudaMemcpyToSymbol(A_512,A_init_512,sizeof(A_init_512),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(B_512,B_init_512,sizeof(B_init_512),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(C_512,C_init_512,sizeof(C_init_512),0, cudaMemcpyHostToDevice);
+}
+
+
+__host__ void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+
+	const int threadsperblock = 256; // Alignment mit mixtab Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+
+	x13_shabal512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
diff --git a/x13/cuda_tiger192.cu b/x13/cuda_tiger192.cu
new file mode 100644
index 0000000000..e1db9fac3a
--- /dev/null
+++ b/x13/cuda_tiger192.cu
@@ -0,0 +1,805 @@
+/*
+ * tiger-192 djm34
+ * 
+ */
+
+/*
+ * tiger-192 kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   phm <phm@inbox.com>
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+
+#include "cuda_helper.h"
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#define ROTL    SPH_ROTL32
+//#define SPH_T64(x)  ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+#define SPH_T64(x)  (x)
+// aus heavy.cu
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+
+ __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+ __constant__ uint64_t bufo[3];
+static __constant__ uint64_t gpu_III[3];
+static __constant__ uint64_t T1[256];
+static __constant__ uint64_t T2[256];
+static __constant__ uint64_t T3[256];
+static __constant__ uint64_t T4[256];
+static const uint64_t III[3] = {
+	SPH_C64(0x0123456789ABCDEF),SPH_C64(0xFEDCBA9876543210),SPH_C64(0xF096A5B4C3B2E187)
+};
+
+static const uint64_t cpu_T1[256] = {
+	SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC),
+	SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A),
+	SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2),
+	SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870),
+	SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C),
+	SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C),
+	SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A),
+	SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E),
+	SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7),
+	SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92),
+	SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D),
+	SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E),
+	SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42),
+	SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04),
+	SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC),
+	SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F),
+	SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B),
+	SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3),
+	SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1),
+	SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066),
+	SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B),
+	SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05),
+	SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28),
+	SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169),
+	SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410),
+	SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B),
+	SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607),
+	SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD),
+	SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5),
+	SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E),
+	SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446),
+	SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE),
+	SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7),
+	SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853),
+	SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201),
+	SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14),
+	SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119),
+	SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2),
+	SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329),
+	SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67),
+	SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D),
+	SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062),
+	SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9),
+	SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C),
+	SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B),
+	SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F),
+	SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC),
+	SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556),
+	SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4),
+	SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881),
+	SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A),
+	SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779),
+	SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC),
+	SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4),
+	SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF),
+	SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96),
+	SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D),
+	SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808),
+	SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE),
+	SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4),
+	SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1),
+	SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017),
+	SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39),
+	SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD),
+	SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194),
+	SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2),
+	SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436),
+	SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8),
+	SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645),
+	SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49),
+	SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F),
+	SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5),
+	SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC),
+	SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA),
+	SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A),
+	SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530),
+	SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631),
+	SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143),
+	SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8),
+	SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD),
+	SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1),
+	SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A),
+	SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06),
+	SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB),
+	SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641),
+	SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40),
+	SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D),
+	SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D),
+	SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8),
+	SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D),
+	SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8),
+	SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886),
+	SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03),
+	SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65),
+	SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6),
+	SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6),
+	SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4),
+	SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450),
+	SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7),
+	SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B),
+	SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5),
+	SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE),
+	SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9),
+	SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024),
+	SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111),
+	SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7),
+	SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1),
+	SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952),
+	SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0),
+	SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77),
+	SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C),
+	SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89),
+	SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2),
+	SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF),
+	SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4),
+	SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34),
+	SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE),
+	SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C),
+	SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A),
+	SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895),
+	SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38),
+	SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74),
+	SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187),
+	SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90),
+	SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648),
+	SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB),
+	SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D),
+	SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A)
+};
+
+static const uint64_t cpu_T2[256] = {
+	SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98),
+	SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5),
+	SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5),
+	SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99),
+	SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16),
+	SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3),
+	SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E),
+	SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2),
+	SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717),
+	SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2),
+	SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9),
+	SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD),
+	SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76),
+	SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA),
+	SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA),
+	SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B),
+	SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC),
+	SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85),
+	SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B),
+	SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70),
+	SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3),
+	SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D),
+	SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC),
+	SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C),
+	SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C),
+	SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822),
+	SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258),
+	SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5),
+	SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06),
+	SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E),
+	SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665),
+	SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762),
+	SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE),
+	SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96),
+	SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4),
+	SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063),
+	SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3),
+	SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE),
+	SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E),
+	SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8),
+	SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD),
+	SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5),
+	SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24),
+	SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B),
+	SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F),
+	SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A),
+	SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556),
+	SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A),
+	SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D),
+	SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4),
+	SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F),
+	SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012),
+	SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A),
+	SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621),
+	SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B),
+	SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97),
+	SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8),
+	SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB),
+	SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30),
+	SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0),
+	SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972),
+	SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C),
+	SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8),
+	SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7),
+	SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635),
+	SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C),
+	SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3),
+	SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61),
+	SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132),
+	SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0),
+	SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31),
+	SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF),
+	SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3),
+	SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB),
+	SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC),
+	SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475),
+	SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967),
+	SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE),
+	SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7),
+	SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1),
+	SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF),
+	SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40),
+	SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D),
+	SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9),
+	SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568),
+	SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB),
+	SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0),
+	SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE),
+	SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1),
+	SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E),
+	SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D),
+	SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183),
+	SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09),
+	SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D),
+	SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784),
+	SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB),
+	SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B),
+	SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23),
+	SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81),
+	SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F),
+	SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4),
+	SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9),
+	SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60),
+	SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF),
+	SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E),
+	SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A),
+	SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615),
+	SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839),
+	SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2),
+	SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552),
+	SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC),
+	SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746),
+	SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A),
+	SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F),
+	SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F),
+	SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054),
+	SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289),
+	SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847),
+	SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642),
+	SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6),
+	SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB),
+	SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C),
+	SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11),
+	SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF),
+	SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A),
+	SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86),
+	SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371),
+	SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E)
+};
+
+static const uint64_t cpu_T3[256] = {
+	SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281),
+	SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63),
+	SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66),
+	SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71),
+	SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017),
+	SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579),
+	SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172),
+	SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41),
+	SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5),
+	SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47),
+	SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491),
+	SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8),
+	SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE),
+	SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902),
+	SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D),
+	SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE),
+	SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067),
+	SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F),
+	SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7),
+	SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB),
+	SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854),
+	SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851),
+	SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4),
+	SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2),
+	SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A),
+	SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74),
+	SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D),
+	SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA),
+	SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07),
+	SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840),
+	SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F),
+	SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB),
+	SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9),
+	SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30),
+	SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9),
+	SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88),
+	SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5),
+	SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252),
+	SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09),
+	SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA),
+	SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0),
+	SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6),
+	SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF),
+	SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559),
+	SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773),
+	SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5),
+	SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778),
+	SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B),
+	SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E),
+	SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6),
+	SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC),
+	SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749),
+	SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C),
+	SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE),
+	SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3),
+	SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D),
+	SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15),
+	SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E),
+	SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC),
+	SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A),
+	SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199),
+	SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80),
+	SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77),
+	SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF),
+	SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503),
+	SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B),
+	SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C),
+	SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76),
+	SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F),
+	SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143),
+	SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396),
+	SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536),
+	SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1),
+	SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D),
+	SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8),
+	SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D),
+	SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB),
+	SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1),
+	SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A),
+	SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32),
+	SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2),
+	SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28),
+	SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F),
+	SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044),
+	SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC),
+	SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C),
+	SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8),
+	SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0),
+	SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365),
+	SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8),
+	SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14),
+	SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00),
+	SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208),
+	SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4),
+	SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1),
+	SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF),
+	SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5),
+	SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6),
+	SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7),
+	SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13),
+	SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9),
+	SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA),
+	SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7),
+	SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45),
+	SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42),
+	SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A),
+	SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4),
+	SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534),
+	SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29),
+	SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C),
+	SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7),
+	SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1),
+	SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C),
+	SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2),
+	SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA),
+	SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50),
+	SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE),
+	SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E),
+	SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7),
+	SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25),
+	SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6),
+	SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68),
+	SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B),
+	SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95),
+	SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05),
+	SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9),
+	SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C),
+	SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E)
+};
+
+static const uint64_t cpu_T4[256] = {
+	SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5),
+	SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7),
+	SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45),
+	SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF),
+	SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F),
+	SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0),
+	SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069),
+	SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7),
+	SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2),
+	SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB),
+	SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94),
+	SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893),
+	SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C),
+	SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D),
+	SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281),
+	SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1),
+	SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F),
+	SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C),
+	SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847),
+	SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04),
+	SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877),
+	SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4),
+	SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06),
+	SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262),
+	SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE),
+	SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5),
+	SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF),
+	SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742),
+	SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495),
+	SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E),
+	SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570),
+	SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1),
+	SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C),
+	SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B),
+	SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567),
+	SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D),
+	SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D),
+	SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF),
+	SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0),
+	SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6),
+	SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE),
+	SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208),
+	SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309),
+	SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27),
+	SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F),
+	SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD),
+	SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7),
+	SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C),
+	SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12),
+	SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216),
+	SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8),
+	SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17),
+	SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA),
+	SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64),
+	SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC),
+	SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013),
+	SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7),
+	SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476),
+	SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A),
+	SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D),
+	SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C),
+	SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638),
+	SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD),
+	SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9),
+	SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C),
+	SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3),
+	SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78),
+	SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453),
+	SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074),
+	SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD),
+	SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397),
+	SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951),
+	SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE),
+	SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C),
+	SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9),
+	SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80),
+	SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310),
+	SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B),
+	SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C),
+	SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1),
+	SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC),
+	SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC),
+	SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B),
+	SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE),
+	SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D),
+	SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D),
+	SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633),
+	SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68),
+	SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4),
+	SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782),
+	SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6),
+	SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21),
+	SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB),
+	SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE),
+	SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA),
+	SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58),
+	SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2),
+	SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC),
+	SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F),
+	SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB),
+	SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63),
+	SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457),
+	SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF),
+	SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99),
+	SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A),
+	SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A),
+	SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01),
+	SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829),
+	SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2),
+	SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0),
+	SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8),
+	SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966),
+	SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B),
+	SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503),
+	SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A),
+	SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0),
+	SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A),
+	SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9),
+	SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0),
+	SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B),
+	SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90),
+	SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4),
+	SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA),
+	SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7),
+	SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED),
+	SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044),
+	SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065),
+	SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F)
+};
+
+#define PASS(a, b, c, mul)    { \
+		ROUND(a, b, c, X0, mul); \
+		ROUND(b, c, a, X1, mul); \
+		ROUND(c, a, b, X2, mul); \
+		ROUND(a, b, c, X3, mul); \
+		ROUND(b, c, a, X4, mul); \
+		ROUND(c, a, b, X5, mul); \
+		ROUND(a, b, c, X6, mul); \
+		ROUND(b, c, a, X7, mul); \
+	} 
+
+#define MUL5(x)   SPH_T64((x) * SPH_C64(5))
+#define MUL7(x)   SPH_T64((x) * SPH_C64(7))
+#define MUL9(x)   SPH_T64((x) * SPH_C64(9))
+/*
+#define MUL5(x)   ((x) * SPH_C64(5))
+#define MUL7(x)   ((x) * SPH_C64(7))
+#define MUL9(x)   ((x) * SPH_C64(9))
+*/
+
+#define KSCHED    { \
+		X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \
+		X1 ^= X0; \
+		X2 = SPH_T64(X2 + X1); \
+		X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \
+		X4 ^= X3; \
+		X5 = SPH_T64(X5 + X4); \
+		X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \
+		X7 ^= X6; \
+		X0 = SPH_T64(X0 + X7); \
+		X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \
+		X2 ^= X1; \
+		X3 = SPH_T64(X3 + X2); \
+		X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \
+		X5 ^= X4; \
+		X6 = SPH_T64(X6 + X5); \
+		X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \
+	} 
+
+#define TIGER_ROUND_BODY(in, r)    { \
+		uint64_t A, B, C; \
+		uint64_t X0, X1, X2, X3, X4, X5, X6, X7; \
+ \
+		A = (r)[0]; \
+		B = (r)[1]; \
+		C = (r)[2]; \
+ \
+		X0 = (in[0]); \
+		X1 = (in[1]); \
+		X2 = (in[2]); \
+		X3 = (in[3]); \
+		X4 = (in[4]); \
+		X5 = (in[5]); \
+		X6 = (in[6]); \
+		X7 = (in[7]); \
+		PASS(A, B, C, MUL5); \
+		KSCHED; \
+		PASS(C, A, B, MUL7); \
+		KSCHED; \
+		PASS(B, C, A, MUL9); \
+ \
+		(r)[0] ^= A; \
+		(r)[1] = SPH_T64(B - (r)[1]); \
+		(r)[2] = SPH_T64(C + (r)[2]); \
+	} 
+
+
+__global__ void m7_tiger192_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+	
+	 __shared__ uint64_t sharedMem[1024];
+	if(threadIdx.x < 256)
+	{
+		sharedMem[threadIdx.x]      = T1[threadIdx.x];
+		sharedMem[threadIdx.x+256]  = T2[threadIdx.x];
+		sharedMem[threadIdx.x+512]  = T3[threadIdx.x];
+		sharedMem[threadIdx.x+768]  = T4[threadIdx.x];
+	}
+	__syncthreads();
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+        uint32_t nounce = startNounce + thread;
+union {
+uint8_t h1[64];
+uint32_t h4[16];
+uint64_t h8[8];
+} hash;  
+/*
+#undef MUL5
+#undef MUL7
+#undef MUL9
+#define MUL5(x)   mul(x,5)
+#define MUL7(x)   mul(x,7)
+#define MUL9(x)   mul(x,9)
+*/
+#define PASS(a, b, c, mul)    { \
+		ROUND(a, b, c, X0, mul); \
+		ROUND(b, c, a, X1, mul); \
+		ROUND(c, a, b, X2, mul); \
+		ROUND(a, b, c, X3, mul); \
+		ROUND(b, c, a, X4, mul); \
+		ROUND(c, a, b, X5, mul); \
+		ROUND(a, b, c, X6, mul); \
+		ROUND(b, c, a, X7, mul); \
+	} 
+
+
+
+#define ROUND(a, b, c, x, mul)    { \
+		c ^= x; \
+		a = SPH_T64(a - (sharedMem[c & 0xFF] ^ sharedMem[((c >> 16) & 0xFF)+256] \
+			      ^ sharedMem[((c >> 32) & 0xFF)+512] ^ sharedMem[((c >> 48) & 0xFF)+768])); \
+		b = SPH_T64(b + (sharedMem[((c >> 8) & 0xFF)+768] ^ sharedMem[((c >> 24) & 0xFF)+512] \
+			^ sharedMem[((c >> 40) & 0xFF)+256] ^ sharedMem[(c >> 56) & 0xFF])); \
+		b = mul(b); \
+	} 
+
+
+        uint64_t in[8],buf[3];
+		uint64_t in2[8],in3[8];
+
+        #pragma unroll 8
+		for (int i=0;i<8;i++) {in2[i]= c_PaddedMessage80[i+8];}
+		uint32_t* Mess = (uint32_t*)in2;
+		Mess[13]=nounce;
+
+        #pragma unroll 7
+		for (int i=0;i<7;i++) {in3[i]=0;}
+		in3[7]=0x3d0;
+
+		#pragma unroll 3
+		for (int i=0;i<3;i++) {buf[i]=bufo[i];}
+
+         TIGER_ROUND_BODY(in2, buf);
+		 TIGER_ROUND_BODY(in3, buf);
+
+#pragma unroll 3
+for (int i=0;i<3;i++) {outputHash[i*threads+thread]=buf[i];} 
+ } //// threads
+}
+
+
+void tiger192_cpu_init(int thr_id, int threads)
+{
+
+    cudaMemcpyToSymbol(gpu_III,III,sizeof(III),0, cudaMemcpyHostToDevice);
+
+	cudaMemcpyToSymbol(T1,cpu_T1,sizeof(cpu_T1),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T2,cpu_T2,sizeof(cpu_T2),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T3,cpu_T3,sizeof(cpu_T3),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T4,cpu_T4,sizeof(cpu_T4),0, cudaMemcpyHostToDevice);
+	
+
+
+}
+
+__host__ void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 640; // Alignment mit mixtab Gr�sse. NICHT �NDERN
+//	const int threadsperblock = 256;
+
+dim3 grid(threads/threadsperblock);
+dim3 block(threadsperblock);
+//dim3 grid(1);
+//dim3 block(1);
+	size_t shared_size =0;
+	m7_tiger192_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+
+__host__ void tiger192_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x01;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); //useless
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+#undef ROUND
+#undef MUL5
+#undef MUL7
+#undef MUL9
+#define MUL5(x)   ((x) * SPH_C64(5))
+#define MUL7(x)   ((x) * SPH_C64(7))
+#define MUL9(x)   ((x) * SPH_C64(9))
+
+#define ROUND(a, b, c, x, mul)    { \
+		c ^= x; \
+		a = SPH_T64(a - (cpu_T1[c & 0xFF] ^ cpu_T2[(c >> 16) & 0xFF] \
+			      ^ cpu_T3[(c >> 32) & 0xFF] ^ cpu_T4[(c >> 48) & 0xFF])); \
+		b = SPH_T64(b + (cpu_T4[(c >> 8) & 0xFF] ^ cpu_T3[(c >> 24) & 0xFF] \
+			^ cpu_T2[(c >> 40) & 0xFF] ^ cpu_T1[(c >> 56) & 0xFF])); \
+		b = mul(b); \
+	}
+
+
+	uint64_t* alt_data = (uint64_t*) pdata;
+	    uint64_t in[8],buf[3];
+		for (int i=0;i<8;i++) {in[i]= alt_data[i];}
+		for (int i=0;i<3;i++) {buf[i]=III[i];}
+
+		 TIGER_ROUND_BODY(in, buf)   
+	cudaMemcpyToSymbol( bufo, buf, 3*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+
+}
\ No newline at end of file
diff --git a/x13/cuda_whirlpool512.cu b/x13/cuda_whirlpool512.cu
new file mode 100644
index 0000000000..1a3d87c4f1
--- /dev/null
+++ b/x13/cuda_whirlpool512.cu
@@ -0,0 +1,2907 @@
+/*
+ * Built on cbuchner1's implementation, actual hashing code
+ * based on sphlib 3.0
+ *
+ */
+/*
+ * Whirlpool kernel implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2014  djm34
+ *                     
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   djm34
+ */
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+
+#define USE_SHARED 1
+
+#define SPH_C64(x)    ((uint64_t)(x ## ULL))
+#define SPH_C32(x)    ((uint32_t)(x ## U))
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+
+#include "cuda_helper.h"
+
+// aus heavy.cu
+
+
+ __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+ __constant__ uint32_t pTarget[8];
+ __constant__ uint64_t stateo[8];
+uint32_t *d_wnounce[8];
+uint32_t *d_WNonce[8];
+
+
+static __constant__ uint64_t T0[256];
+static __constant__ uint64_t T1[256];
+static __constant__ uint64_t T2[256];
+static __constant__ uint64_t T3[256];
+static __constant__ uint64_t T4[256];
+static __constant__ uint64_t T5[256];
+static __constant__ uint64_t T6[256];
+static __constant__ uint64_t T7[256];
+static const uint64_t old1_T0[256] = {
+	SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323),
+	SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8),
+	SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8),
+	SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F),
+	SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6),
+	SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5),
+	SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F),
+	SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252),
+	SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC),
+	SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E),
+	SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C),
+	SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535),
+	SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0),
+	SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2),
+	SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B),
+	SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757),
+	SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777),
+	SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5),
+	SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0),
+	SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA),
+	SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9),
+	SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A),
+	SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0),
+	SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585),
+	SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D),
+	SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4),
+	SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E),
+	SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767),
+	SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727),
+	SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B),
+	SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D),
+	SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8),
+	SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE),
+	SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666),
+	SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717),
+	SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E),
+	SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D),
+	SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707),
+	SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A),
+	SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333),
+	SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202),
+	SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171),
+	SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919),
+	SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9),
+	SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3),
+	SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888),
+	SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626),
+	SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0),
+	SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F),
+	SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080),
+	SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD),
+	SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848),
+	SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A),
+	SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F),
+	SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868),
+	SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE),
+	SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454),
+	SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222),
+	SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1),
+	SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212),
+	SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808),
+	SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC),
+	SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1),
+	SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D),
+	SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B),
+	SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282),
+	SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B),
+	SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF),
+	SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050),
+	SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3),
+	SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF),
+	SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555),
+	SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA),
+	SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA),
+	SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0),
+	SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C),
+	SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D),
+	SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575),
+	SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A),
+	SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6),
+	SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F),
+	SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4),
+	SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696),
+	SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5),
+	SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959),
+	SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272),
+	SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C),
+	SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878),
+	SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C),
+	SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5),
+	SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161),
+	SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121),
+	SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E),
+	SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7),
+	SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404),
+	SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999),
+	SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D),
+	SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF),
+	SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424),
+	SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB),
+	SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111),
+	SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E),
+	SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB),
+	SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181),
+	SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7),
+	SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313),
+	SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3),
+	SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E),
+	SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303),
+	SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444),
+	SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9),
+	SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB),
+	SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353),
+	SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B),
+	SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C),
+	SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474),
+	SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646),
+	SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989),
+	SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1),
+	SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A),
+	SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909),
+	SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6),
+	SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED),
+	SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242),
+	SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4),
+	SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C),
+	SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686)
+};
+
+
+
+static const uint64_t old1_T1[256] = {
+	SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF),
+	SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F),
+	SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862),
+	SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E),
+	SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604),
+	SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506),
+	SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE),
+	SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207),
+	SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76),
+	SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C),
+	SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C),
+	SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1),
+	SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047),
+	SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED),
+	SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A),
+	SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716),
+	SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6),
+	SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556),
+	SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017),
+	SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95),
+	SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA),
+	SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22),
+	SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A),
+	SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB),
+	SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34),
+	SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403),
+	SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6),
+	SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6),
+	SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB),
+	SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D),
+	SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94),
+	SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F),
+	SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71),
+	SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3),
+	SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B),
+	SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC),
+	SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99),
+	SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B),
+	SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F),
+	SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF),
+	SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A),
+	SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8),
+	SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D),
+	SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A),
+	SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348),
+	SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892),
+	SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE),
+	SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A),
+	SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33),
+	SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA),
+	SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE),
+	SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875),
+	SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F),
+	SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E),
+	SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5),
+	SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C),
+	SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419),
+	SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA),
+	SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112),
+	SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A),
+	SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828),
+	SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B),
+	SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F),
+	SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9),
+	SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000),
+	SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87),
+	SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0),
+	SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77),
+	SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29),
+	SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D),
+	SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318),
+	SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74),
+	SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C),
+	SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65),
+	SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68),
+	SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7),
+	SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C),
+	SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64),
+	SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC),
+	SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98),
+	SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659),
+	SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63),
+	SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3),
+	SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4),
+	SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6),
+	SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920),
+	SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7),
+	SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61),
+	SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885),
+	SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86),
+	SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B),
+	SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8),
+	SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5),
+	SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66),
+	SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC),
+	SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414),
+	SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7),
+	SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39),
+	SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84),
+	SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4),
+	SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D),
+	SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155),
+	SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B),
+	SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60),
+	SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF),
+	SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C),
+	SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F),
+	SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8),
+	SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB),
+	SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F),
+	SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449),
+	SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937),
+	SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D),
+	SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302),
+	SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27),
+	SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1),
+	SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9),
+	SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643),
+	SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997),
+	SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142),
+	SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2),
+	SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D),
+	SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654),
+	SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E),
+	SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257),
+	SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E),
+	SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31),
+	SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4)
+};
+
+static const uint64_t old1_T2[256] = {
+	SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26),
+	SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB),
+	SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211),
+	SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D),
+	SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF),
+	SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E),
+	SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30),
+	SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8),
+	SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635),
+	SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A),
+	SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C),
+	SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180),
+	SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3),
+	SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C),
+	SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29),
+	SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5),
+	SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8),
+	SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E),
+	SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723),
+	SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544),
+	SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF),
+	SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A),
+	SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9),
+	SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9),
+	SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F),
+	SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307),
+	SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3),
+	SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678),
+	SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02),
+	SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7),
+	SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2),
+	SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56),
+	SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD),
+	SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371),
+	SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF),
+	SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A),
+	SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958),
+	SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F),
+	SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0),
+	SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6),
+	SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12),
+	SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE),
+	SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1),
+	SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F),
+	SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8),
+	SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC),
+	SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B),
+	SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59),
+	SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377),
+	SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4),
+	SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB),
+	SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532),
+	SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D),
+	SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D),
+	SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F),
+	SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7),
+	SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE),
+	SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F),
+	SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A),
+	SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82),
+	SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848),
+	SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF),
+	SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0),
+	SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8),
+	SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000),
+	SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E),
+	SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6),
+	SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3),
+	SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE),
+	SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA),
+	SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838),
+	SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4),
+	SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7),
+	SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9),
+	SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803),
+	SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E),
+	SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC),
+	SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F),
+	SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA),
+	SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE),
+	SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985),
+	SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7),
+	SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A),
+	SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452),
+	SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3),
+	SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB),
+	SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5),
+	SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116),
+	SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F),
+	SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698),
+	SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4),
+	SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E),
+	SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534),
+	SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE),
+	SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1),
+	SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424),
+	SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725),
+	SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965),
+	SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469),
+	SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419),
+	SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A),
+	SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599),
+	SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04),
+	SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0),
+	SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD),
+	SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C),
+	SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B),
+	SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805),
+	SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39),
+	SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B),
+	SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E),
+	SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788),
+	SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A),
+	SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1),
+	SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753),
+	SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B),
+	SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3),
+	SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C),
+	SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5),
+	SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA),
+	SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7),
+	SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41),
+	SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F),
+	SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6),
+	SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768),
+	SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED),
+	SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186),
+	SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2)
+};
+
+static const uint64_t old1_T3[256] = {
+	SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605),
+	SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13),
+	SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9),
+	SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42),
+	SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59),
+	SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB),
+	SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F),
+	SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA),
+	SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589),
+	SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04),
+	SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60),
+	SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5),
+	SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353),
+	SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E),
+	SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962),
+	SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582),
+	SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F),
+	SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B),
+	SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3),
+	SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E),
+	SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06),
+	SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50),
+	SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969),
+	SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C),
+	SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2),
+	SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3),
+	SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED),
+	SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F),
+	SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225),
+	SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C),
+	SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF),
+	SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E),
+	SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23),
+	SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117),
+	SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8),
+	SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84),
+	SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875),
+	SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38),
+	SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA),
+	SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685),
+	SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210),
+	SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF),
+	SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8),
+	SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86),
+	SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B),
+	SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34),
+	SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D),
+	SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9),
+	SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778),
+	SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474),
+	SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26),
+	SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A),
+	SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7),
+	SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2),
+	SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67),
+	SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719),
+	SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A),
+	SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D),
+	SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB),
+	SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290),
+	SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840),
+	SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33),
+	SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061),
+	SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5),
+	SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45),
+	SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664),
+	SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8),
+	SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11),
+	SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA),
+	SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB),
+	SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B),
+	SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792),
+	SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903),
+	SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9),
+	SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E),
+	SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0),
+	SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52),
+	SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F),
+	SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24),
+	SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563),
+	SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8),
+	SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE),
+	SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4),
+	SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366),
+	SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2),
+	SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7),
+	SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A),
+	SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7),
+	SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814),
+	SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441),
+	SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F),
+	SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415),
+	SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0),
+	SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176),
+	SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420),
+	SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC),
+	SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568),
+	SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6),
+	SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D),
+	SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31),
+	SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988),
+	SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A),
+	SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B),
+	SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C),
+	SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB),
+	SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98),
+	SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6),
+	SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957),
+	SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18),
+	SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A),
+	SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821),
+	SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1),
+	SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2),
+	SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358),
+	SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47),
+	SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387),
+	SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A),
+	SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C),
+	SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B),
+	SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD),
+	SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148),
+	SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9),
+	SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B),
+	SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A),
+	SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49),
+	SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA),
+	SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244)
+};
+
+static const uint64_t old1_T4[256] = {
+	SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF),
+	SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F),
+	SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962),
+	SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E),
+	SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904),
+	SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06),
+	SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE),
+	SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07),
+	SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976),
+	SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C),
+	SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C),
+	SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1),
+	SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347),
+	SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED),
+	SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A),
+	SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216),
+	SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6),
+	SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56),
+	SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317),
+	SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95),
+	SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA),
+	SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022),
+	SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A),
+	SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB),
+	SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234),
+	SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303),
+	SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6),
+	SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6),
+	SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB),
+	SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D),
+	SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94),
+	SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F),
+	SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371),
+	SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3),
+	SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B),
+	SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC),
+	SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599),
+	SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B),
+	SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F),
+	SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF),
+	SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A),
+	SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8),
+	SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D),
+	SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A),
+	SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48),
+	SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492),
+	SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE),
+	SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A),
+	SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833),
+	SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA),
+	SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE),
+	SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75),
+	SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F),
+	SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E),
+	SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5),
+	SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C),
+	SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19),
+	SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA),
+	SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12),
+	SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A),
+	SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028),
+	SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B),
+	SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F),
+	SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9),
+	SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587),
+	SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0),
+	SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877),
+	SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129),
+	SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D),
+	SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18),
+	SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74),
+	SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C),
+	SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365),
+	SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968),
+	SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7),
+	SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C),
+	SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264),
+	SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC),
+	SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498),
+	SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359),
+	SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863),
+	SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3),
+	SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4),
+	SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6),
+	SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220),
+	SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7),
+	SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61),
+	SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785),
+	SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486),
+	SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B),
+	SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8),
+	SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5),
+	SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066),
+	SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC),
+	SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014),
+	SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7),
+	SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839),
+	SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684),
+	SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4),
+	SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D),
+	SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855),
+	SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B),
+	SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60),
+	SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF),
+	SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C),
+	SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F),
+	SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8),
+	SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB),
+	SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F),
+	SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49),
+	SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137),
+	SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D),
+	SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202),
+	SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827),
+	SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1),
+	SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9),
+	SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43),
+	SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97),
+	SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42),
+	SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2),
+	SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D),
+	SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954),
+	SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E),
+	SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57),
+	SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E),
+	SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31),
+	SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4)
+};
+
+static const uint64_t old1_T5[256] = {
+	SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23),
+	SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8),
+	SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8),
+	SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F),
+	SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6),
+	SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5),
+	SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F),
+	SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752),
+	SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC),
+	SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E),
+	SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C),
+	SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135),
+	SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0),
+	SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2),
+	SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B),
+	SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657),
+	SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677),
+	SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5),
+	SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0),
+	SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA),
+	SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9),
+	SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A),
+	SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0),
+	SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85),
+	SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D),
+	SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4),
+	SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E),
+	SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667),
+	SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27),
+	SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B),
+	SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D),
+	SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8),
+	SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE),
+	SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366),
+	SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17),
+	SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E),
+	SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D),
+	SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07),
+	SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A),
+	SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33),
+	SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02),
+	SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871),
+	SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19),
+	SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9),
+	SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3),
+	SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288),
+	SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26),
+	SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0),
+	SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F),
+	SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80),
+	SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD),
+	SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548),
+	SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A),
+	SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F),
+	SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568),
+	SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE),
+	SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954),
+	SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22),
+	SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1),
+	SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12),
+	SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808),
+	SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC),
+	SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1),
+	SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D),
+	SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000),
+	SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B),
+	SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082),
+	SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B),
+	SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF),
+	SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50),
+	SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3),
+	SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF),
+	SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55),
+	SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA),
+	SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA),
+	SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0),
+	SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C),
+	SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D),
+	SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75),
+	SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A),
+	SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6),
+	SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F),
+	SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4),
+	SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496),
+	SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5),
+	SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059),
+	SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772),
+	SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C),
+	SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578),
+	SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C),
+	SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5),
+	SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861),
+	SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521),
+	SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E),
+	SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7),
+	SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404),
+	SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799),
+	SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D),
+	SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF),
+	SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424),
+	SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB),
+	SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511),
+	SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E),
+	SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB),
+	SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81),
+	SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7),
+	SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13),
+	SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3),
+	SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E),
+	SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03),
+	SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944),
+	SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9),
+	SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB),
+	SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253),
+	SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B),
+	SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C),
+	SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974),
+	SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346),
+	SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789),
+	SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1),
+	SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A),
+	SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09),
+	SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6),
+	SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED),
+	SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742),
+	SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4),
+	SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C),
+	SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486)
+};
+
+static const uint64_t old1_T6[256] = {
+	SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365),
+	SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825),
+	SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5),
+	SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1),
+	SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7),
+	SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502),
+	SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1),
+	SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6),
+	SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9),
+	SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F),
+	SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14),
+	SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F),
+	SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D),
+	SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B),
+	SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD),
+	SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9),
+	SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799),
+	SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532),
+	SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D),
+	SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73),
+	SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946),
+	SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E),
+	SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD),
+	SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592),
+	SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7),
+	SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401),
+	SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42),
+	SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9),
+	SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769),
+	SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80),
+	SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87),
+	SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875),
+	SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F),
+	SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA),
+	SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739),
+	SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF),
+	SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77),
+	SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709),
+	SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE),
+	SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355),
+	SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206),
+	SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193),
+	SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B),
+	SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976),
+	SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338),
+	SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885),
+	SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A),
+	SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD),
+	SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11),
+	SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D),
+	SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A),
+	SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8),
+	SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E),
+	SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1),
+	SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8),
+	SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF),
+	SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC),
+	SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266),
+	SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E),
+	SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236),
+	SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818),
+	SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29),
+	SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE),
+	SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47),
+	SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D),
+	SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B),
+	SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D),
+	SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC),
+	SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0),
+	SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308),
+	SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C),
+	SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF),
+	SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23),
+	SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3),
+	SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D),
+	SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24),
+	SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7),
+	SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F),
+	SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83),
+	SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637),
+	SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21),
+	SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461),
+	SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7),
+	SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552),
+	SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB),
+	SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296),
+	SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4),
+	SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888),
+	SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89),
+	SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2),
+	SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3),
+	SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163),
+	SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22),
+	SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754),
+	SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C),
+	SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6),
+	SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17),
+	SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C),
+	SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C),
+	SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0),
+	SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133),
+	SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2),
+	SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20),
+	SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E),
+	SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704),
+	SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335),
+	SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368),
+	SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2),
+	SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305),
+	SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC),
+	SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6),
+	SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0),
+	SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5),
+	SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D),
+	SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4),
+	SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C),
+	SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA),
+	SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986),
+	SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E),
+	SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E),
+	SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B),
+	SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7),
+	SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A),
+	SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6),
+	SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1),
+	SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4),
+	SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697)
+};
+
+static const uint64_t old1_T7[256] = {
+	SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523),
+	SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8),
+	SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8),
+	SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F),
+	SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6),
+	SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5),
+	SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F),
+	SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652),
+	SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC),
+	SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E),
+	SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C),
+	SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35),
+	SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0),
+	SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2),
+	SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B),
+	SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957),
+	SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977),
+	SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5),
+	SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0),
+	SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA),
+	SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9),
+	SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A),
+	SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0),
+	SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285),
+	SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D),
+	SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4),
+	SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E),
+	SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967),
+	SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927),
+	SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B),
+	SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D),
+	SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8),
+	SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE),
+	SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66),
+	SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917),
+	SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E),
+	SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D),
+	SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907),
+	SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A),
+	SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533),
+	SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602),
+	SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371),
+	SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19),
+	SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9),
+	SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3),
+	SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588),
+	SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26),
+	SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0),
+	SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F),
+	SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80),
+	SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD),
+	SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848),
+	SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A),
+	SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F),
+	SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868),
+	SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE),
+	SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54),
+	SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622),
+	SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1),
+	SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612),
+	SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808),
+	SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC),
+	SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1),
+	SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D),
+	SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B),
+	SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82),
+	SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B),
+	SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF),
+	SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050),
+	SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3),
+	SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF),
+	SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55),
+	SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA),
+	SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA),
+	SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0),
+	SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C),
+	SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D),
+	SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75),
+	SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A),
+	SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6),
+	SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F),
+	SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4),
+	SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796),
+	SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5),
+	SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59),
+	SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672),
+	SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C),
+	SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878),
+	SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C),
+	SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5),
+	SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361),
+	SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321),
+	SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E),
+	SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7),
+	SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04),
+	SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699),
+	SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D),
+	SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF),
+	SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24),
+	SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB),
+	SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311),
+	SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E),
+	SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB),
+	SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81),
+	SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7),
+	SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513),
+	SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3),
+	SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E),
+	SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503),
+	SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44),
+	SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9),
+	SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB),
+	SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553),
+	SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B),
+	SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C),
+	SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74),
+	SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46),
+	SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689),
+	SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1),
+	SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A),
+	SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09),
+	SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6),
+	SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED),
+	SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642),
+	SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4),
+	SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C),
+	SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786)
+};
+
+
+
+static const uint64_t old1_RC[10] = {
+	SPH_C64(0x4F01B887E8C62318),
+	SPH_C64(0x52916F79F5D2A636),
+	SPH_C64(0x357B0CA38E9BBC60),
+	SPH_C64(0x57FE4B2EC2D7E01D),
+	SPH_C64(0xDA4AF09FE5377715),
+	SPH_C64(0x856BA0B10A29C958),
+	SPH_C64(0x67053ECBF4105DBD),
+	SPH_C64(0xD8957DA78B4127E4),
+	SPH_C64(0x9E4717DD667CEEFB),
+	SPH_C64(0x33835AAD07BF2DCA)
+};
+
+
+
+
+
+static const uint64_t plain_T0[256] = {
+	SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323),
+	SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8),
+	SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8),
+	SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F),
+	SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6),
+	SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5),
+	SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F),
+	SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252),
+	SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC),
+	SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E),
+	SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C),
+	SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535),
+	SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0),
+	SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2),
+	SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B),
+	SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757),
+	SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777),
+	SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5),
+	SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0),
+	SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA),
+	SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9),
+	SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A),
+	SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0),
+	SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585),
+	SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D),
+	SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4),
+	SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E),
+	SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767),
+	SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727),
+	SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B),
+	SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D),
+	SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8),
+	SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE),
+	SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666),
+	SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717),
+	SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E),
+	SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D),
+	SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707),
+	SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A),
+	SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333),
+	SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202),
+	SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171),
+	SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919),
+	SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9),
+	SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3),
+	SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888),
+	SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626),
+	SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0),
+	SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F),
+	SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080),
+	SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD),
+	SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848),
+	SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A),
+	SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F),
+	SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868),
+	SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE),
+	SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454),
+	SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222),
+	SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1),
+	SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212),
+	SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808),
+	SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC),
+	SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1),
+	SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D),
+	SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000),
+	SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B),
+	SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282),
+	SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B),
+	SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF),
+	SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050),
+	SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3),
+	SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF),
+	SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555),
+	SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA),
+	SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA),
+	SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0),
+	SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C),
+	SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D),
+	SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575),
+	SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A),
+	SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6),
+	SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F),
+	SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4),
+	SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696),
+	SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5),
+	SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959),
+	SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272),
+	SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C),
+	SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878),
+	SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C),
+	SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5),
+	SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161),
+	SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121),
+	SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E),
+	SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7),
+	SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404),
+	SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999),
+	SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D),
+	SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF),
+	SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424),
+	SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB),
+	SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111),
+	SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E),
+	SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB),
+	SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181),
+	SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7),
+	SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313),
+	SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3),
+	SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E),
+	SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303),
+	SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444),
+	SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9),
+	SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB),
+	SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353),
+	SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B),
+	SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C),
+	SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474),
+	SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646),
+	SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989),
+	SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1),
+	SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A),
+	SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909),
+	SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6),
+	SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED),
+	SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242),
+	SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4),
+	SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C),
+	SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686)
+};
+
+static const uint64_t plain_T1[256] = {
+	SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326),
+	SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB),
+	SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811),
+	SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D),
+	SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF),
+	SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E),
+	SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30),
+	SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8),
+	SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35),
+	SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A),
+	SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C),
+	SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580),
+	SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3),
+	SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C),
+	SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29),
+	SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5),
+	SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8),
+	SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E),
+	SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023),
+	SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44),
+	SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF),
+	SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A),
+	SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9),
+	SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9),
+	SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F),
+	SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407),
+	SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3),
+	SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778),
+	SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702),
+	SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7),
+	SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2),
+	SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856),
+	SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD),
+	SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671),
+	SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF),
+	SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A),
+	SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58),
+	SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F),
+	SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0),
+	SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6),
+	SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212),
+	SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE),
+	SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1),
+	SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F),
+	SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8),
+	SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC),
+	SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B),
+	SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059),
+	SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77),
+	SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4),
+	SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB),
+	SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832),
+	SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D),
+	SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D),
+	SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F),
+	SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7),
+	SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE),
+	SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F),
+	SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A),
+	SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282),
+	SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848),
+	SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF),
+	SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0),
+	SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8),
+	SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000),
+	SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E),
+	SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6),
+	SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3),
+	SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE),
+	SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA),
+	SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338),
+	SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4),
+	SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7),
+	SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9),
+	SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03),
+	SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E),
+	SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC),
+	SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F),
+	SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA),
+	SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE),
+	SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685),
+	SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7),
+	SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A),
+	SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652),
+	SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3),
+	SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB),
+	SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5),
+	SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16),
+	SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F),
+	SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98),
+	SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4),
+	SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E),
+	SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134),
+	SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE),
+	SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1),
+	SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424),
+	SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925),
+	SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65),
+	SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69),
+	SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419),
+	SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A),
+	SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199),
+	SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04),
+	SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0),
+	SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD),
+	SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C),
+	SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B),
+	SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305),
+	SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39),
+	SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B),
+	SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E),
+	SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988),
+	SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A),
+	SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1),
+	SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53),
+	SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B),
+	SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3),
+	SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C),
+	SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5),
+	SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA),
+	SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7),
+	SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941),
+	SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F),
+	SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6),
+	SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268),
+	SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED),
+	SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86),
+	SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2)
+};
+
+static const uint64_t plain_T2[256] = {
+	SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646),
+	SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD),
+	SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D),
+	SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E),
+	SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51),
+	SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7),
+	SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE),
+	SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4),
+	SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565),
+	SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01),
+	SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18),
+	SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A),
+	SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD),
+	SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99),
+	SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996),
+	SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE),
+	SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE),
+	SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7),
+	SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD),
+	SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9),
+	SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F),
+	SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14),
+	SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D),
+	SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917),
+	SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA),
+	SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5),
+	SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C),
+	SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE),
+	SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E),
+	SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B),
+	SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA),
+	SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD),
+	SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1),
+	SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC),
+	SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E),
+	SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21),
+	SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A),
+	SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E),
+	SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4),
+	SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666),
+	SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204),
+	SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2),
+	SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132),
+	SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF),
+	SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB),
+	SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D),
+	SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C),
+	SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D),
+	SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E),
+	SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D),
+	SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87),
+	SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290),
+	SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4),
+	SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE),
+	SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0),
+	SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741),
+	SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8),
+	SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44),
+	SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF),
+	SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224),
+	SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810),
+	SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5),
+	SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F),
+	SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A),
+	SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000),
+	SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56),
+	SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619),
+	SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336),
+	SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43),
+	SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0),
+	SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB),
+	SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3),
+	SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA),
+	SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9),
+	SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369),
+	SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D),
+	SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38),
+	SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A),
+	SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA),
+	SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09),
+	SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1),
+	SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E),
+	SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5),
+	SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231),
+	SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397),
+	SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2),
+	SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4),
+	SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698),
+	SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0),
+	SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805),
+	SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457),
+	SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2),
+	SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442),
+	SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C),
+	SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193),
+	SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408),
+	SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F),
+	SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A),
+	SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3),
+	SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948),
+	SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B),
+	SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922),
+	SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C),
+	SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB),
+	SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F),
+	SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3),
+	SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26),
+	SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB),
+	SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC),
+	SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06),
+	SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88),
+	SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F),
+	SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B),
+	SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6),
+	SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316),
+	SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8),
+	SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8),
+	SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C),
+	SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F),
+	SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF),
+	SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774),
+	SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112),
+	SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71),
+	SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7),
+	SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884),
+	SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55),
+	SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8),
+	SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211)
+};
+
+static const uint64_t plain_T3[256] = {
+	SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF),
+	SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F),
+	SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62),
+	SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E),
+	SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104),
+	SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706),
+	SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE),
+	SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407),
+	SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576),
+	SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C),
+	SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C),
+	SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1),
+	SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47),
+	SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED),
+	SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A),
+	SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16),
+	SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6),
+	SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756),
+	SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17),
+	SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995),
+	SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA),
+	SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422),
+	SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A),
+	SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB),
+	SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34),
+	SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503),
+	SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6),
+	SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6),
+	SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB),
+	SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D),
+	SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94),
+	SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F),
+	SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171),
+	SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3),
+	SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B),
+	SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC),
+	SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99),
+	SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B),
+	SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F),
+	SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF),
+	SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A),
+	SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8),
+	SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D),
+	SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A),
+	SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48),
+	SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92),
+	SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE),
+	SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A),
+	SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33),
+	SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA),
+	SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE),
+	SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075),
+	SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F),
+	SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E),
+	SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5),
+	SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C),
+	SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819),
+	SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA),
+	SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12),
+	SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A),
+	SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028),
+	SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B),
+	SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F),
+	SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9),
+	SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000),
+	SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687),
+	SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0),
+	SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677),
+	SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329),
+	SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D),
+	SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18),
+	SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374),
+	SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C),
+	SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965),
+	SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968),
+	SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7),
+	SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C),
+	SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64),
+	SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC),
+	SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998),
+	SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159),
+	SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63),
+	SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3),
+	SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4),
+	SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6),
+	SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220),
+	SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7),
+	SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861),
+	SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085),
+	SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586),
+	SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B),
+	SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8),
+	SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5),
+	SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66),
+	SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC),
+	SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814),
+	SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7),
+	SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39),
+	SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384),
+	SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4),
+	SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D),
+	SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255),
+	SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B),
+	SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60),
+	SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF),
+	SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C),
+	SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F),
+	SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8),
+	SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB),
+	SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F),
+	SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849),
+	SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37),
+	SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D),
+	SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602),
+	SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627),
+	SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1),
+	SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9),
+	SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43),
+	SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97),
+	SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42),
+	SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2),
+	SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D),
+	SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154),
+	SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E),
+	SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457),
+	SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E),
+	SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831),
+	SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4)
+};
+
+static const uint64_t plain_T4[256] = {
+	SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05),
+	SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13),
+	SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9),
+	SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42),
+	SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459),
+	SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB),
+	SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F),
+	SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA),
+	SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689),
+	SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04),
+	SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60),
+	SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5),
+	SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753),
+	SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E),
+	SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62),
+	SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682),
+	SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F),
+	SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B),
+	SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3),
+	SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E),
+	SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06),
+	SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250),
+	SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69),
+	SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C),
+	SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2),
+	SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3),
+	SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED),
+	SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F),
+	SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25),
+	SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C),
+	SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF),
+	SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E),
+	SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123),
+	SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317),
+	SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8),
+	SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84),
+	SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975),
+	SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38),
+	SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA),
+	SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85),
+	SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10),
+	SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF),
+	SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8),
+	SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86),
+	SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B),
+	SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234),
+	SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D),
+	SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9),
+	SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378),
+	SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74),
+	SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26),
+	SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A),
+	SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7),
+	SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2),
+	SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567),
+	SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19),
+	SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A),
+	SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D),
+	SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB),
+	SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90),
+	SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840),
+	SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33),
+	SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61),
+	SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5),
+	SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745),
+	SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064),
+	SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8),
+	SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911),
+	SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA),
+	SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB),
+	SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B),
+	SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92),
+	SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503),
+	SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9),
+	SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E),
+	SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0),
+	SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452),
+	SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F),
+	SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824),
+	SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963),
+	SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8),
+	SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE),
+	SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4),
+	SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666),
+	SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2),
+	SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7),
+	SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A),
+	SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7),
+	SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614),
+	SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41),
+	SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F),
+	SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515),
+	SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0),
+	SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76),
+	SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420),
+	SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC),
+	SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968),
+	SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6),
+	SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D),
+	SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31),
+	SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588),
+	SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A),
+	SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B),
+	SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C),
+	SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB),
+	SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98),
+	SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6),
+	SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57),
+	SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18),
+	SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A),
+	SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721),
+	SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1),
+	SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2),
+	SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758),
+	SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147),
+	SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987),
+	SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A),
+	SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C),
+	SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B),
+	SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD),
+	SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48),
+	SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9),
+	SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B),
+	SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A),
+	SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49),
+	SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA),
+	SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444)
+};
+
+static const uint64_t plain_T5[256] = {
+	SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523),
+	SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8),
+	SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8),
+	SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F),
+	SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6),
+	SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5),
+	SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F),
+	SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52),
+	SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC),
+	SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E),
+	SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C),
+	SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535),
+	SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0),
+	SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2),
+	SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B),
+	SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257),
+	SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77),
+	SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5),
+	SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0),
+	SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA),
+	SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9),
+	SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A),
+	SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0),
+	SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85),
+	SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D),
+	SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4),
+	SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E),
+	SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67),
+	SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527),
+	SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B),
+	SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D),
+	SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8),
+	SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE),
+	SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766),
+	SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817),
+	SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E),
+	SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D),
+	SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807),
+	SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A),
+	SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533),
+	SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002),
+	SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71),
+	SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819),
+	SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9),
+	SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3),
+	SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488),
+	SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26),
+	SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0),
+	SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F),
+	SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480),
+	SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD),
+	SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48),
+	SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A),
+	SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F),
+	SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768),
+	SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE),
+	SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54),
+	SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22),
+	SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1),
+	SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012),
+	SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008),
+	SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC),
+	SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1),
+	SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D),
+	SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000),
+	SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B),
+	SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482),
+	SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B),
+	SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF),
+	SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50),
+	SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3),
+	SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF),
+	SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255),
+	SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA),
+	SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA),
+	SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0),
+	SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C),
+	SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D),
+	SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75),
+	SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A),
+	SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6),
+	SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F),
+	SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4),
+	SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496),
+	SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5),
+	SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259),
+	SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772),
+	SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C),
+	SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778),
+	SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C),
+	SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5),
+	SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61),
+	SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521),
+	SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E),
+	SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7),
+	SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004),
+	SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99),
+	SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D),
+	SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF),
+	SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24),
+	SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB),
+	SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811),
+	SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E),
+	SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB),
+	SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81),
+	SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7),
+	SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813),
+	SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3),
+	SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E),
+	SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803),
+	SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44),
+	SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9),
+	SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB),
+	SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253),
+	SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B),
+	SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C),
+	SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774),
+	SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46),
+	SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89),
+	SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1),
+	SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A),
+	SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809),
+	SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6),
+	SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED),
+	SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42),
+	SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4),
+	SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C),
+	SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486)
+};
+
+static const uint64_t plain_T6[256] = {
+	SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C),
+	SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887),
+	SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA),
+	SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21),
+	SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2),
+	SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3),
+	SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1),
+	SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255),
+	SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA),
+	SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02),
+	SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30),
+	SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4),
+	SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7),
+	SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F),
+	SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31),
+	SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741),
+	SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1),
+	SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3),
+	SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7),
+	SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F),
+	SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903),
+	SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28),
+	SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA),
+	SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E),
+	SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69),
+	SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7),
+	SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8),
+	SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781),
+	SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C),
+	SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16),
+	SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9),
+	SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847),
+	SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F),
+	SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685),
+	SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C),
+	SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42),
+	SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4),
+	SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C),
+	SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75),
+	SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC),
+	SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208),
+	SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9),
+	SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964),
+	SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943),
+	SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB),
+	SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A),
+	SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698),
+	SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA),
+	SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C),
+	SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A),
+	SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13),
+	SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D),
+	SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5),
+	SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61),
+	SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD),
+	SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82),
+	SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D),
+	SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288),
+	SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3),
+	SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248),
+	SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820),
+	SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97),
+	SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE),
+	SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4),
+	SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC),
+	SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232),
+	SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C),
+	SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86),
+	SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D),
+	SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB),
+	SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B),
+	SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549),
+	SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F),
+	SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2),
+	SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027),
+	SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70),
+	SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29),
+	SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9),
+	SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12),
+	SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF),
+	SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C),
+	SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477),
+	SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662),
+	SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533),
+	SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979),
+	SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5),
+	SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D),
+	SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD),
+	SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A),
+	SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE),
+	SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199),
+	SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184),
+	SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78),
+	SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B),
+	SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410),
+	SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E),
+	SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34),
+	SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B),
+	SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490),
+	SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96),
+	SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144),
+	SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25),
+	SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B),
+	SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E),
+	SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB),
+	SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C),
+	SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B),
+	SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5),
+	SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C),
+	SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D),
+	SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E),
+	SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6),
+	SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351),
+	SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C),
+	SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD),
+	SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD),
+	SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605),
+	SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E),
+	SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3),
+	SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8),
+	SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924),
+	SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2),
+	SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93),
+	SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215),
+	SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA),
+	SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D),
+	SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622)
+};
+
+static const uint64_t plain_T7[256] = {
+	SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23),
+	SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8),
+	SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8),
+	SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F),
+	SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6),
+	SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5),
+	SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F),
+	SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552),
+	SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC),
+	SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E),
+	SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C),
+	SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435),
+	SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0),
+	SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2),
+	SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B),
+	SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157),
+	SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177),
+	SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5),
+	SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0),
+	SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA),
+	SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9),
+	SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A),
+	SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0),
+	SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85),
+	SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D),
+	SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4),
+	SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E),
+	SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167),
+	SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27),
+	SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B),
+	SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D),
+	SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8),
+	SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE),
+	SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566),
+	SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17),
+	SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E),
+	SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D),
+	SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07),
+	SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A),
+	SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33),
+	SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802),
+	SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971),
+	SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419),
+	SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9),
+	SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3),
+	SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88),
+	SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826),
+	SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0),
+	SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F),
+	SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80),
+	SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD),
+	SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48),
+	SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A),
+	SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F),
+	SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68),
+	SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE),
+	SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54),
+	SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822),
+	SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1),
+	SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812),
+	SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008),
+	SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC),
+	SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1),
+	SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D),
+	SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000),
+	SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B),
+	SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282),
+	SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B),
+	SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF),
+	SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50),
+	SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3),
+	SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF),
+	SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955),
+	SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA),
+	SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA),
+	SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0),
+	SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C),
+	SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D),
+	SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975),
+	SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A),
+	SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6),
+	SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F),
+	SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4),
+	SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296),
+	SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5),
+	SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959),
+	SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572),
+	SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C),
+	SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78),
+	SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C),
+	SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5),
+	SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961),
+	SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421),
+	SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E),
+	SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7),
+	SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004),
+	SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99),
+	SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D),
+	SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF),
+	SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024),
+	SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB),
+	SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411),
+	SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E),
+	SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB),
+	SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81),
+	SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7),
+	SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13),
+	SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3),
+	SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E),
+	SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03),
+	SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44),
+	SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9),
+	SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB),
+	SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153),
+	SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B),
+	SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C),
+	SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74),
+	SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546),
+	SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89),
+	SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1),
+	SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A),
+	SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409),
+	SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6),
+	SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED),
+	SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542),
+	SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4),
+	SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C),
+	SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286)
+};
+
+/*
+ * Round constants.
+ */
+
+ __constant__ uint64_t InitVector_RC[10];
+
+static const uint64_t plain_RC[10] = {
+	SPH_C64(0x4F01B887E8C62318),
+	SPH_C64(0x52916F79F5D2A636),
+	SPH_C64(0x357B0CA38E9BBC60),
+	SPH_C64(0x57FE4B2EC2D7E01D),
+	SPH_C64(0xDA4AF09FE5377715),
+	SPH_C64(0x856BA0B10A29C958),
+	SPH_C64(0x67053ECBF4105DBD),
+	SPH_C64(0xD8957DA78B4127E4),
+	SPH_C64(0x9E4717DD667CEEFB),
+	SPH_C64(0x33835AAD07BF2DCA)
+};
+
+/* ====================================================================== */
+ 
+#define BYTE(x, n)     ((unsigned)((x) >> (8 * (n))) & 0xFF)
+//#define asBYTE(x, n)      byte(x,n)
+static __device__ __forceinline__ uint64_t ROUND_ELT(const  uint64_t* __restrict sharedMemory,uint64_t* in,int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7) 
+{
+uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7;
+
+
+
+
+idx0 =  ((uint8_t*)in)[8*i0];
+idx1 =  ((uint8_t*)in)[8*i1+1]+256;
+idx2 =  ((uint8_t*)in)[8*i2+2]+512;
+idx3 =  ((uint8_t*)in)[8*i3+3]+768;
+idx4 =  ((uint8_t*)in)[8*i4+4]+1024;
+idx5 =  ((uint8_t*)in)[8*i5+5]+1280;
+idx6 =  ((uint8_t*)in)[8*i6+6]+1536;
+idx7 =  ((uint8_t*)in)[8*i7+7]+1792;
+
+uint64_t S0=sharedMemory[idx0];
+uint64_t S1=sharedMemory[idx1];
+uint64_t S2=sharedMemory[idx2];
+uint64_t S3=sharedMemory[idx3];
+uint64_t S4=sharedMemory[idx4];
+uint64_t S5=sharedMemory[idx5];
+uint64_t S6=sharedMemory[idx6];
+uint64_t S7=sharedMemory[idx7];
+
+
+
+
+uint64_t result = xor8(S0, S1, S2, S3, S4, S5, S6, S7);
+
+return result;
+
+}
+
+
+
+#define ROUND_ELTo(in, i0, i1, i2, i3, i4, i5, i6, i7) \
+    ( plain_T0[BYTE(in[i0], 0)] \
+    ^ plain_T1[BYTE(in[i1], 1)] \
+    ^ plain_T2[BYTE(in[i2], 2)] \
+    ^ plain_T3[BYTE(in[i3], 3)] \
+    ^ plain_T4[BYTE(in[i4], 4)] \
+    ^ plain_T5[BYTE(in[i5], 5)] \
+    ^ plain_T6[BYTE(in[i6], 6)] \
+    ^ plain_T7[BYTE(in[i7], 7)])
+
+#define ROUND_ELTold(in, i0, i1, i2, i3, i4, i5, i6, i7) \
+    ( old1_T0[BYTE(in[i0], 0)] \
+    ^ old1_T1[BYTE(in[i1], 1)] \
+    ^ old1_T2[BYTE(in[i2], 2)] \
+    ^ old1_T3[BYTE(in[i3], 3)] \
+    ^ old1_T4[BYTE(in[i4], 4)] \
+    ^ old1_T5[BYTE(in[i5], 5)] \
+    ^ old1_T6[BYTE(in[i6], 6)] \
+    ^ old1_T7[BYTE(in[i7], 7)])
+
+
+
+static __device__ __forceinline__ void whirlpool_device_round(const uint64_t* __restrict sharedMemory,uint64_t* n, uint64_t* h)
+{
+
+uint64_t t0, t1, t2, t3, t4, t5, t6, t7;
+uint64_t T0, T1, T2, T3, T4, T5, T6, T7; 
+
+
+#pragma unroll    
+    for (unsigned r = 0; r < 10; r ++) {
+        
+		t0 = ROUND_ELT(sharedMemory,h, 0, 7, 6, 5, 4, 3, 2, 1);
+        t1 = ROUND_ELT(sharedMemory,h, 1, 0, 7, 6, 5, 4, 3, 2);
+        t2 = ROUND_ELT(sharedMemory,h, 2, 1, 0, 7, 6, 5, 4, 3);
+        t3 = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4);
+        t4 = ROUND_ELT(sharedMemory,h, 4, 3, 2, 1, 0, 7, 6, 5);
+        t5 = ROUND_ELT(sharedMemory,h, 5, 4, 3, 2, 1, 0, 7, 6);
+        t6 = ROUND_ELT(sharedMemory,h, 6, 5, 4, 3, 2, 1, 0, 7);
+        t7 = ROUND_ELT(sharedMemory,h, 7, 6, 5, 4, 3, 2, 1, 0);
+        T0 = ROUND_ELT(sharedMemory,n, 0, 7, 6, 5, 4, 3, 2, 1);
+        T1 = ROUND_ELT(sharedMemory,n, 1, 0, 7, 6, 5, 4, 3, 2);
+        T2 = ROUND_ELT(sharedMemory,n, 2, 1, 0, 7, 6, 5, 4, 3);
+        T3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4);
+        T4 = ROUND_ELT(sharedMemory,n, 4, 3, 2, 1, 0, 7, 6, 5);
+        T5 = ROUND_ELT(sharedMemory,n, 5, 4, 3, 2, 1, 0, 7, 6);
+        T6 = ROUND_ELT(sharedMemory,n, 6, 5, 4, 3, 2, 1, 0, 7);
+        T7 = ROUND_ELT(sharedMemory,n, 7, 6, 5, 4, 3, 2, 1, 0);
+        
+		h[0] = xor1(t0,InitVector_RC[r]);
+        h[1] = t1;
+        h[2] = t2;
+        h[3] = t3;
+        h[4] = t4;
+        h[5] = t5;
+        h[6] = t6;
+        h[7] = t7;
+		
+		
+
+        n[0] = xor3(T0,t0,InitVector_RC[r]);
+        n[1] = xor1(T1,h[1]);
+        n[2] = xor1(T2,h[2]);
+        n[3] = xor1(T3,h[3]);
+        n[4] = xor1(T4,h[4]);
+        n[5] = xor1(T5,h[5]);
+        n[6] = xor1(T6,h[6]);
+        n[7] = xor1(T7,h[7]);
+
+    }
+
+
+}
+
+static __device__ __forceinline__ void whirlpool_device_finalround(const uint64_t* __restrict sharedMemory,uint64_t* n, uint64_t* h)
+{
+
+uint64_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+
+
+#pragma unroll    
+    for (unsigned r = 0; r < 9; r ++) {
+        
+		t0 = ROUND_ELT(sharedMemory,h, 0, 7, 6, 5, 4, 3, 2, 1);
+        t1 = ROUND_ELT(sharedMemory,h, 1, 0, 7, 6, 5, 4, 3, 2);
+        t2 = ROUND_ELT(sharedMemory,h, 2, 1, 0, 7, 6, 5, 4, 3);
+        t3 = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4);
+        t4 = ROUND_ELT(sharedMemory,h, 4, 3, 2, 1, 0, 7, 6, 5);
+        t5 = ROUND_ELT(sharedMemory,h, 5, 4, 3, 2, 1, 0, 7, 6);
+        t6 = ROUND_ELT(sharedMemory,h, 6, 5, 4, 3, 2, 1, 0, 7);
+        t7 = ROUND_ELT(sharedMemory,h, 7, 6, 5, 4, 3, 2, 1, 0);
+        
+        h[0] = xor1(t0,InitVector_RC[r]);
+        h[1] = t1;
+        h[2] = t2;
+        h[3] = t3;
+        h[4] = t4;
+        h[5] = t5;
+        h[6] = t6;
+        h[7] = t7;
+		
+		t0 = ROUND_ELT(sharedMemory,n, 0, 7, 6, 5, 4, 3, 2, 1);
+        t1 = ROUND_ELT(sharedMemory,n, 1, 0, 7, 6, 5, 4, 3, 2);
+        t2 = ROUND_ELT(sharedMemory,n, 2, 1, 0, 7, 6, 5, 4, 3);
+        t3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4);
+        t4 = ROUND_ELT(sharedMemory,n, 4, 3, 2, 1, 0, 7, 6, 5);
+        t5 = ROUND_ELT(sharedMemory,n, 5, 4, 3, 2, 1, 0, 7, 6);
+        t6 = ROUND_ELT(sharedMemory,n, 6, 5, 4, 3, 2, 1, 0, 7);
+        t7 = ROUND_ELT(sharedMemory,n, 7, 6, 5, 4, 3, 2, 1, 0);
+
+        n[0] = xor1(t0,h[0]);
+        n[1] = xor1(t1,h[1]);
+        n[2] = xor1(t2,h[2]);
+        n[3] = xor1(t3,h[3]);
+        n[4] = xor1(t4,h[4]);
+        n[5] = xor1(t5,h[5]);
+        n[6] = xor1(t6,h[6]);
+        n[7] = xor1(t7,h[7]);
+
+    }
+	h[3] = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4);
+	t3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4);
+	n[3] = xor1(t3,h[3]);
+}
+
+
+static  void whirlpool_round(uint64_t* n, uint64_t* h){
+    uint64_t t0, t1, t2, t3, t4, t5, t6, t7;
+    for (unsigned r = 0; r < 10; r ++) {
+        t0 = ROUND_ELTo(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ plain_RC[r];
+        t1 = ROUND_ELTo(h, 1, 0, 7, 6, 5, 4, 3, 2);
+        t2 = ROUND_ELTo(h, 2, 1, 0, 7, 6, 5, 4, 3);
+        t3 = ROUND_ELTo(h, 3, 2, 1, 0, 7, 6, 5, 4);
+        t4 = ROUND_ELTo(h, 4, 3, 2, 1, 0, 7, 6, 5);
+        t5 = ROUND_ELTo(h, 5, 4, 3, 2, 1, 0, 7, 6);
+        t6 = ROUND_ELTo(h, 6, 5, 4, 3, 2, 1, 0, 7);
+        t7 = ROUND_ELTo(h, 7, 6, 5, 4, 3, 2, 1, 0);
+
+        h[0] = t0;
+        h[1] = t1;
+        h[2] = t2;
+        h[3] = t3;
+        h[4] = t4;
+        h[5] = t5;
+        h[6] = t6;
+        h[7] = t7;
+
+        t0 = ROUND_ELTo(n, 0, 7, 6, 5, 4, 3, 2, 1) ^ h[0];
+        t1 = ROUND_ELTo(n, 1, 0, 7, 6, 5, 4, 3, 2) ^ h[1];
+        t2 = ROUND_ELTo(n, 2, 1, 0, 7, 6, 5, 4, 3) ^ h[2];
+        t3 = ROUND_ELTo(n, 3, 2, 1, 0, 7, 6, 5, 4) ^ h[3];
+        t4 = ROUND_ELTo(n, 4, 3, 2, 1, 0, 7, 6, 5) ^ h[4];
+        t5 = ROUND_ELTo(n, 5, 4, 3, 2, 1, 0, 7, 6) ^ h[5];
+        t6 = ROUND_ELTo(n, 6, 5, 4, 3, 2, 1, 0, 7) ^ h[6];
+        t7 = ROUND_ELTo(n, 7, 6, 5, 4, 3, 2, 1, 0) ^ h[7];
+
+        n[0] = t0;
+        n[1] = t1;
+        n[2] = t2;
+        n[3] = t3;
+        n[4] = t4;
+        n[5] = t5;
+        n[6] = t6;
+        n[7] = t7;
+    }
+}
+
+static  void whirlpool_round_old(uint64_t* n, uint64_t* h){
+    uint64_t t0, t1, t2, t3, t4, t5, t6, t7;
+    for (unsigned r = 0; r < 10; r ++) {
+        t0 = ROUND_ELTold(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ old1_RC[r];
+        t1 = ROUND_ELTold(h, 1, 0, 7, 6, 5, 4, 3, 2);
+        t2 = ROUND_ELTold(h, 2, 1, 0, 7, 6, 5, 4, 3);
+        t3 = ROUND_ELTold(h, 3, 2, 1, 0, 7, 6, 5, 4);
+        t4 = ROUND_ELTold(h, 4, 3, 2, 1, 0, 7, 6, 5);
+        t5 = ROUND_ELTold(h, 5, 4, 3, 2, 1, 0, 7, 6);
+        t6 = ROUND_ELTold(h, 6, 5, 4, 3, 2, 1, 0, 7);
+        t7 = ROUND_ELTold(h, 7, 6, 5, 4, 3, 2, 1, 0);
+
+        h[0] = t0;
+        h[1] = t1;
+        h[2] = t2;
+        h[3] = t3;
+        h[4] = t4;
+        h[5] = t5;
+        h[6] = t6;
+        h[7] = t7;
+
+        t0 = ROUND_ELTold(n, 0, 7, 6, 5, 4, 3, 2, 1) ^ h[0];
+        t1 = ROUND_ELTold(n, 1, 0, 7, 6, 5, 4, 3, 2) ^ h[1];
+        t2 = ROUND_ELTold(n, 2, 1, 0, 7, 6, 5, 4, 3) ^ h[2];
+        t3 = ROUND_ELTold(n, 3, 2, 1, 0, 7, 6, 5, 4) ^ h[3];
+        t4 = ROUND_ELTold(n, 4, 3, 2, 1, 0, 7, 6, 5) ^ h[4];
+        t5 = ROUND_ELTold(n, 5, 4, 3, 2, 1, 0, 7, 6) ^ h[5];
+        t6 = ROUND_ELTold(n, 6, 5, 4, 3, 2, 1, 0, 7) ^ h[6];
+        t7 = ROUND_ELTold(n, 7, 6, 5, 4, 3, 2, 1, 0) ^ h[7];
+
+        n[0] = t0;
+        n[1] = t1;
+        n[2] = t2;
+        n[3] = t3;
+        n[4] = t4;
+        n[5] = t5;
+        n[6] = t6;
+        n[7] = t7;
+    }
+}
+
+__global__ void whirlpool512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash)
+{
+     __shared__ uint64_t sharedMemory[2048];
+	if(threadIdx.x < 256)
+	{
+		sharedMemory[threadIdx.x]      = T0[threadIdx.x];
+		sharedMemory[threadIdx.x+256]  = T1[threadIdx.x];
+		sharedMemory[threadIdx.x+512]  = T2[threadIdx.x];
+		sharedMemory[threadIdx.x+768]  = T3[threadIdx.x];
+		sharedMemory[threadIdx.x+1024] = T4[threadIdx.x];
+		sharedMemory[threadIdx.x+1280] = T5[threadIdx.x];
+		sharedMemory[threadIdx.x+1536] = T6[threadIdx.x];
+		sharedMemory[threadIdx.x+1792] = T7[threadIdx.x];
+	}
+	
+//	__syncthreads();
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+
+		uint32_t nounce = startNounce + thread;
+
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+
+
+	/// round 2 ///////
+	//////////////////////////////////
+	n[0] = c_PaddedMessage80[8];    //read data
+	n[1] = c_PaddedMessage80[9];
+	((uint32_t*)n)[3]=cuda_swab32(nounce);
+	uint64_t tempnonce =n[1];
+    n[2] = c_PaddedMessage80[10]; //whirlpool
+	n[3] = 0;
+	n[4] = 0;
+	n[5] = 0;
+	n[6] = 0;
+    n[7] = 0x8002000000000000;
+
+	
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		h[i] = stateo[i];  } //read state
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		n[i] = xor1(n[i],h[i]);}
+	
+	whirlpool_device_round(sharedMemory,n,h);
+	
+	state[0] = xor3(stateo[0],n[0],c_PaddedMessage80[8]);
+    state[1] = xor3(stateo[1],n[1],tempnonce);
+    state[2] = xor3(stateo[2],n[2],c_PaddedMessage80[10]);// whirlpool
+    state[3] = xor1(stateo[3],n[3]);
+    state[4] = xor1(stateo[4],n[4]);
+    state[5] = xor1(stateo[5],n[5]);
+    state[6] = xor1(stateo[6],n[6]);
+    state[7] = xor3(stateo[7],n[7],0x8002000000000000);
+
+
+      uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
+
+      for(int i=0;i<8;i++)
+		outHash[i] = state[i];
+
+		
+	} // thread < threads
+
+}
+
+__global__ void __launch_bounds__(512,2) m7_whirlpool512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+     __shared__ uint64_t sharedMemory[2048];
+	if(threadIdx.x < 256)
+	{
+		sharedMemory[threadIdx.x]      = T0[threadIdx.x];
+		sharedMemory[threadIdx.x+256]  = T1[threadIdx.x];
+		sharedMemory[threadIdx.x+512]  = T2[threadIdx.x];
+		sharedMemory[threadIdx.x+768]  = T3[threadIdx.x];
+		sharedMemory[threadIdx.x+1024] = T4[threadIdx.x];
+		sharedMemory[threadIdx.x+1280] = T5[threadIdx.x];
+		sharedMemory[threadIdx.x+1536] = T6[threadIdx.x];
+		sharedMemory[threadIdx.x+1792] = T7[threadIdx.x];
+	}
+
+	__syncthreads();
+
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+
+		uint32_t nounce = startNounce + thread;
+
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+
+
+	n[6]=c_PaddedMessage80[14];
+	((uint32_t*)n)[13]=nounce; 
+    uint64_t tempnonce = n[6]; 
+	n[6] = xor1(tempnonce,stateo[6]);
+	
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		h[i] = stateo[i];  } //read state
+#pragma unroll 6
+	for (int i=0;i<6;i++){
+		n[i] = xor1(c_PaddedMessage80[8+i],h[i]);}
+	    n[7] = xor1(c_PaddedMessage80[15],h[7]);
+	
+    whirlpool_device_round(sharedMemory,n,h);
+	state[0] = xor3(stateo[0],n[0],c_PaddedMessage80[8+0]);
+    state[1] = xor3(stateo[1],n[1],c_PaddedMessage80[8+1]);
+    state[2] = xor3(stateo[2],n[2],c_PaddedMessage80[8+2]);
+    state[3] = xor3(stateo[3],n[3],c_PaddedMessage80[8+3]);
+    state[4] = xor3(stateo[4],n[4],c_PaddedMessage80[8+4]);
+    state[5] = xor3(stateo[5],n[5],c_PaddedMessage80[8+5]);
+	state[6] = xor3(stateo[6],n[6],tempnonce);
+    state[7] = xor3(stateo[7],n[7],c_PaddedMessage80[8+7]);
+
+//// round 3
+#pragma unroll 7
+	for (int i=0;i<7;i++) {n[i]=state[i];}
+    n[7] = xor1(state[7],0xd003000000000000);
+
+#pragma unroll 8
+	for (int i=0;i<8;i++) {h[i] = state[i];} 
+
+	
+	whirlpool_device_round(sharedMemory,n,h);
+	state[0] = xor1(state[0],n[0]);
+    state[1] = xor1(state[1],n[1]);
+    state[2] = xor1(state[2],n[2]);
+    state[3] = xor1(state[3],n[3]);
+    state[4] = xor1(state[4],n[4]);
+    state[5] = xor1(state[5],n[5]);
+    state[6] = xor1(state[6],n[6]);
+    state[7] = xor3(state[7],n[7],0xd003000000000000);
+  
+
+
+#pragma unroll 8
+for (int i=0;i<8;i++) {outputHash[i*threads+thread]=state[i];}
+
+	} // thread < threads
+
+}
+
+
+__global__ void whirlpool512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector)
+{
+
+
+     __shared__ uint64_t sharedMemory[2048];
+	if(threadIdx.x < 256)
+	{
+		sharedMemory[threadIdx.x]      = T0[threadIdx.x];
+		sharedMemory[threadIdx.x+256]  = T1[threadIdx.x];
+		sharedMemory[threadIdx.x+512]  = T2[threadIdx.x];
+		sharedMemory[threadIdx.x+768]  = T3[threadIdx.x];
+		sharedMemory[threadIdx.x+1024] = T4[threadIdx.x];
+		sharedMemory[threadIdx.x+1280] = T5[threadIdx.x];
+		sharedMemory[threadIdx.x+1536] = T6[threadIdx.x];
+		sharedMemory[threadIdx.x+1792] = T7[threadIdx.x];
+	}
+	
+//	__syncthreads();
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+
+        int hashPosition = nounce - startNounce;
+        uint64_t *inpHash = (uint64_t*)g_hash + 8*hashPosition;
+		
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+	
+
+          #pragma unroll 8
+          for (int i=0;i<8;i++){
+			  n[i] = inpHash[i];}
+		  #pragma unroll 8
+          for (int i=0;i<8;i++){
+		   h[i] = 0;}
+   
+	  whirlpool_device_round(sharedMemory,n,h);
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		state[i] = xor1(n[i],inpHash[i]);}
+#pragma unroll 6
+	for (int i=1;i<7;i++) {
+	        n[i]=0;}
+
+    n[0] = 0x80;
+    n[7] = 0x2000000000000;
+
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		h[i] = state[i];}
+
+#pragma unroll 6
+	for (int i=1;i<7;i++) {
+		n[i] = h[i];}
+	    n[0] = xor1(n[0],h[0]);
+		n[7] = xor1(n[7],h[7]);	
+
+	whirlpool_device_round(sharedMemory,n,h);
+    state[0] = xor3(state[0],n[0],0x80);
+    state[1] = xor1(state[1],n[1]);
+    state[2] = xor1(state[2],n[2]);
+	state[3] = xor1(state[3],n[3]);
+    state[4] = xor1(state[4],n[4]);
+	state[5] = xor1(state[5],n[5]);
+    state[6] = xor1(state[6],n[6]);
+    state[7] = xor3(state[7],n[7],0x2000000000000);	
+    
+    #pragma unroll 8
+	for (unsigned i = 0; i < 8; i ++)
+	inpHash[i] = state[i];
+    
+ } 
+ }
+   
+ __global__ void whirlpool512_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector, uint32_t *resNounce)
+{
+
+
+     __shared__ uint64_t sharedMemory[2048];
+	if(threadIdx.x < 256)
+	{
+		sharedMemory[threadIdx.x]      = T0[threadIdx.x];
+		sharedMemory[threadIdx.x+256]  = T1[threadIdx.x];
+		sharedMemory[threadIdx.x+512]  = T2[threadIdx.x];
+		sharedMemory[threadIdx.x+768]  = T3[threadIdx.x];
+		sharedMemory[threadIdx.x+1024] = T4[threadIdx.x];
+		sharedMemory[threadIdx.x+1280] = T5[threadIdx.x];
+		sharedMemory[threadIdx.x+1536] = T6[threadIdx.x];
+		sharedMemory[threadIdx.x+1792] = T7[threadIdx.x];
+	}
+	
+//	__syncthreads();
+	
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
+
+        int hashPosition = nounce - startNounce;
+        uint64_t *inpHash = (uint64_t*)g_hash + 8 * hashPosition;
+		
+ 
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+	
+
+          #pragma unroll 8
+          for (int i=0;i<8;i++){
+			  n[i] = inpHash[i];}
+		  #pragma unroll 8
+          for (int i=0;i<8;i++){
+		   h[i] = 0;}
+
+whirlpool_device_round(sharedMemory,n,h);
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		state[i] = xor1(n[i],inpHash[i]);}
+#pragma unroll 6
+	for (int i=1;i<7;i++) {
+	        n[i]=0;}
+
+    n[0] = 0x80;
+    n[7] = 0x2000000000000;
+
+#pragma unroll 8
+	for (int i=0;i<8;i++) {
+		h[i] = state[i];}
+#pragma unroll 6
+	for (int i=1;i<7;i++) {
+		n[i] = h[i];}
+	    n[0] = xor1(n[0],h[0]);
+		n[7] = xor1(n[7],h[7]);
+
+	whirlpool_device_finalround(sharedMemory,n,h);
+	state[3] = xor1(state[3],n[3]);
+
+    	
+		bool rc = false;
+		if (state[3]<=((uint64_t*)pTarget)[3]) {rc=true;}
+
+		if(rc == true)
+		{
+			if(resNounce[0] > nounce)
+				resNounce[0] = nounce;
+		}
+
+ } 
+ }
+
+void whirlpool512_cpu_init(int thr_id, int threads, int flag)
+{
+
+    if (flag==1){
+	cudaMemcpyToSymbol(T0,old1_T0,sizeof(old1_T0),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T1,old1_T1,sizeof(old1_T1),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T2,old1_T2,sizeof(old1_T2),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T3,old1_T3,sizeof(old1_T3),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T4,old1_T4,sizeof(old1_T4),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T5,old1_T5,sizeof(old1_T5),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T6,old1_T6,sizeof(old1_T6),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T7,old1_T7,sizeof(old1_T7),0, cudaMemcpyHostToDevice);
+
+	cudaMemcpyToSymbol(InitVector_RC,old1_RC,sizeof(plain_RC),0, cudaMemcpyHostToDevice);
+	} else {
+    cudaMemcpyToSymbol(T0,plain_T0,sizeof(plain_T0),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T1,plain_T1,sizeof(plain_T1),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T2,plain_T2,sizeof(plain_T2),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T3,plain_T3,sizeof(plain_T3),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T4,plain_T4,sizeof(plain_T4),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T5,plain_T5,sizeof(plain_T5),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T6,plain_T6,sizeof(plain_T6),0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol(T7,plain_T7,sizeof(plain_T7),0, cudaMemcpyHostToDevice);
+
+	cudaMemcpyToSymbol(InitVector_RC,plain_RC,sizeof(plain_RC),0, cudaMemcpyHostToDevice);
+	}
+    cudaMalloc(&d_WNonce[thr_id], sizeof(uint32_t)); 
+	cudaMallocHost(&d_wnounce[thr_id], 1*sizeof(uint32_t));
+
+} 
+
+
+__host__ void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+
+	const int threadsperblock = 512; // Alignment mit mixtob Gr�sse. NICHT �NDERN
+
+	// berechne wie viele Thread Blocks wir brauchen
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	
+	whirlpool512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__ uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
+{
+    uint32_t result = 0xffffffff;
+	cudaMemset(d_WNonce[thr_id], 0xff, sizeof(uint32_t));
+	const int threadsperblock = 512; // maximize occupancy
+
+	dim3 grid(threads/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	
+	whirlpool512_gpu_finalhash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector,d_WNonce[thr_id]);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+	cudaMemcpy(d_wnounce[thr_id], d_WNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	//cudaThreadSynchronize();
+	result = *d_wnounce[thr_id];
+
+	return result;
+
+
+}
+
+__host__ void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 512; 
+	dim3 grid((threads + threadsperblock-1)/threadsperblock);
+	dim3 block(threadsperblock);
+
+	size_t shared_size = 0;
+	
+	whirlpool512_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+__host__ void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
+{
+
+	const int threadsperblock = 512; 
+	dim3 block(threadsperblock);
+	dim3 grid(threads/threadsperblock);
+
+	size_t shared_size =0;
+	m7_whirlpool512_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash);
+
+	MyStreamSynchronize(NULL, order, thr_id);
+}
+
+
+__host__ void whirlpool512_setBlock_80(void *pdata, const void *ptarget)
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x80;
+	memcpy(PaddedMessage, pdata, 80);
+	memset(PaddedMessage+80, ending, 1);
+	memset(PaddedMessage+81, 0, 47);
+	cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	
+	uint64_t* alt_data = (uint64_t*) pdata;
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+	h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = h[7] = 0;
+	for (int i=0;i<8;i++) {n[i]=alt_data[i];}
+	whirlpool_round_old(n, h);
+	for (int i=0;i<8;i++) {state[i]=n[i]^alt_data[i];}
+	cudaMemcpyToSymbol( stateo, state, 8*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+}
+
+__host__ void whirlpool512_setBlock_120(void *pdata)
+{
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x80;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); //useless
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+	uint64_t* alt_data = (uint64_t*) pdata;
+    uint64_t state[8];
+	uint64_t n[8];
+	uint64_t h[8];
+	h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = h[7] = 0;
+	for (int i=0;i<8;i++) {n[i]=alt_data[i];}
+	whirlpool_round(n, h);
+	for (int i=0;i<8;i++) {state[i]=n[i]^alt_data[i];}
+	cudaMemcpyToSymbol( stateo, state, 8*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+
+}
\ No newline at end of file
diff --git a/x13/cuda_x13_hamsi512.cu b/x13/cuda_x13_hamsi512.cu
index a9039a9d74..e67e6afe63 100644
--- a/x13/cuda_x13_hamsi512.cu
+++ b/x13/cuda_x13_hamsi512.cu
@@ -37,32 +37,36 @@
  * @author   phm <phm@inbox.com>
  */
 
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
+typedef unsigned char BitSequence;
+
+
+#include "cuda_helper.h"
 
 #define SPH_C64(x)    ((uint64_t)(x ## ULL))
 #define SPH_C32(x)    ((uint32_t)(x ## U))
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
 
-#define SWAB32(x) ( __byte_perm(x, x, 0x0123) )
+#define SWAB32(x)      cuda_swab32(x)
+#define ROTL32(x,n)    SPH_ROTL32(x,n) 
 
-#if __CUDA_ARCH__ < 350 
-    // Kepler (Compute 3.0)
-    #define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
-#else
-    // Kepler (Compute 3.5)
-    #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
-#endif
 
 static __constant__ uint32_t d_alpha_n[32];
 static __constant__ uint32_t d_alpha_f[32];
 static __constant__ uint32_t d_T512[64][16];
 
+
+
 static const uint32_t alpha_n[] = {
 	SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
 	SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
diff --git a/x13/fresh.cu b/x13/fresh.cu
new file mode 100644
index 0000000000..8016f37ab1
--- /dev/null
+++ b/x13/fresh.cu
@@ -0,0 +1,166 @@
+/*
+ * fresh algorithm built on cbuchner1's original X11
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_setBlock_80(void *pdata);
+extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+// fresh Hashfunktion
+inline void fresh_hash(void *state, const void *input)
+{
+    // shavite-simd-shavite-simd-echo
+
+    
+    sph_shavite512_context ctx_shavite;
+    sph_simd512_context ctx_simd;
+    sph_echo512_context ctx_echo;
+	
+    uint32_t hash[16];
+
+	// shavite 1
+    sph_shavite512_init(&ctx_shavite);
+    sph_shavite512 (&ctx_shavite, input, 80);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	// simd 1
+    sph_simd512_init(&ctx_simd);    
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+	// shavite 2
+    sph_shavite512_init(&ctx_shavite);
+    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+	// simd 2
+    sph_simd512_init(&ctx_simd);
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+	// echo
+    sph_echo512_init(&ctx_echo);
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+	
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+
+
+
+		quark_check_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+	
+	x11_shavite512_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+	do {
+		int order = 0;
+
+		// Shavite512
+		x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		//  SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+        //  Shavite 512
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		//  SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// echo
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// Scan nach Gewinner Hashes auf der GPU
+		
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			fresh_hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/x13/goalcoin.cu b/x13/goalcoin.cu
new file mode 100644
index 0000000000..c89df37678
--- /dev/null
+++ b/x13/goalcoin.cu
@@ -0,0 +1,222 @@
+/*
+ * Goalcoin
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+
+#include "sph/sph_shabal.h"
+#include "sph/sph_whirlpool.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_shabal512_cpu_init(int thr_id, int threads);
+extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void whirlpool512_cpu_init(int thr_id, int threads,int flag);
+extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget);
+
+
+
+// goalcoin hash function
+inline void goalhash(void *state, const void *input)
+{
+    // blake-groestl-jh-keccak-skein-whirlpool
+
+    sph_blake512_context ctx_blake;
+    
+    sph_groestl512_context ctx_groestl;
+    sph_jh512_context ctx_jh;
+    sph_keccak512_context ctx_keccak;
+    sph_skein512_context ctx_skein;
+    sph_whirlpool_context  ctx_whirlpool;
+
+    uint32_t hash[16];
+
+    sph_blake512_init(&ctx_blake);
+    // ZBLAKE;
+    sph_blake512 (&ctx_blake, input, 80);
+    sph_blake512_close(&ctx_blake, (void*) hash);
+
+    
+
+    sph_groestl512_init(&ctx_groestl);
+    // ZGROESTL;
+    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+    sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+    sph_jh512_init(&ctx_jh);
+    // ZJH;
+    sph_jh512 (&ctx_jh, (const void*) hash, 64);
+    sph_jh512_close(&ctx_jh, (void*) hash);
+
+    sph_keccak512_init(&ctx_keccak);
+    // ZKECCAK;
+    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+    sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+    sph_skein512_init(&ctx_skein);
+    // ZSKEIN;
+    sph_skein512 (&ctx_skein, (const void*) hash, 64);
+    sph_skein512_close(&ctx_skein, (void*) hash);
+
+    sph_whirlpool_init(&ctx_whirlpool);
+    sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool_close(&ctx_whirlpool, (void*) hash); 
+
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_goal(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);		
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		whirlpool512_cpu_init(thr_id, throughput,0);
+
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	whirlpool512_setBlock_80((void*)endiandata, ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes Blake512 Hash mit CUDA
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Groestl512
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r JH512
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Keccak512
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+         // das ist der unbedingte Branch f�r Skein512
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);		
+			
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			goalhash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/x13/m7.cu b/x13/m7.cu
new file mode 100644
index 0000000000..c116011394
--- /dev/null
+++ b/x13/m7.cu
@@ -0,0 +1,341 @@
+/*
+ * m7 algorithm 
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_sha2.h"
+#include "sph/sph_keccak.h"
+#include "sph/sph_ripemd.h"
+#include "sph/sph_haval.h"
+#include "sph/sph_tiger.h"
+#include "sph/sph_whirlpool.h"
+#include "sph/sph_blake.h"
+#include "miner.h"
+}
+//#include "mpir.h"
+
+extern int device_map[8];
+
+
+static uint64_t *d_hash[8];
+static uint64_t *KeccakH[8];
+static uint64_t *Sha512H[8];
+static uint64_t *d_prod0[8];
+static uint64_t *d_prod1[8];
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+/*
+static void mpz_set_uint256(mpz_t r, uint8_t *u)
+{
+    mpz_import(r, 32 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
+}
+
+static void mpz_get_uint256(mpz_t r, uint8_t *u)
+{
+    u=0;
+    mpz_export(u, 0, -1, sizeof(unsigned long), -1, 0, r);
+}
+
+static void mpz_set_uint512(mpz_t r, uint8_t *u)
+{
+    mpz_import(r, 64 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u);
+}
+
+static void set_one_if_zero(uint8_t *hash512) {
+    for (int i = 0; i < 32; i++) {
+        if (hash512[i] != 0) {
+            return;
+        }
+    }
+    hash512[0] = 1;
+}
+*/
+//extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector, uint64_t *d_hash, int order);
+
+extern void m7_sha256_setBlock_120(void *data,const void *ptarget);
+extern void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
+extern void m7_sha256_cpu_init(int thr_id, int threads);
+
+
+extern void sha512_cpu_init(int thr_id, int threads);
+extern void sha512_setBlock_120(void *pdata);
+extern void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
+
+extern void ripemd160_cpu_init(int thr_id, int threads);
+extern void ripemd160_setBlock_120(void *pdata);
+extern void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
+
+extern void tiger192_cpu_init(int thr_id, int threads);
+extern void tiger192_setBlock_120(void *pdata);
+extern void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
+
+
+extern void m7_bigmul_init(int thr_id, int threads);
+extern void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
+extern void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order);
+
+extern void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
+extern void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order);
+extern void mul_init();
+
+	
+extern void m7_keccak512_setBlock_120(void *pdata);
+extern void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order);
+extern void m7_keccak512_cpu_init(int thr_id, int threads);
+
+extern void whirlpool512_cpu_init(int thr_id, int threads, int flag);
+extern void whirlpool512_setBlock_120(void *pdata);
+extern void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
+
+extern void haval256_cpu_init(int thr_id, int threads);
+extern void haval256_setBlock_120(void *data);
+extern void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order);
+
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint64_t *d_inputHash, int order);
+
+
+
+// m7 Hashfunktion
+/*
+inline void m7_hash(void *state, const void *input,uint32_t TheNonce, int debug)
+{
+    // sha256(sha256*sha512*keccak512*ripemd160*haval*tiger1*whirlpool) good luck with that...
+	
+	char data_str[245], hash_str[65], target_str[65];
+    uint8_t *bdata = 0;
+    mpz_t bns[7];
+    mpz_t product;
+    int rc = 0;
+	
+    for(int i=0; i < 7; i++){
+        mpz_init(bns[i]);
+    }
+    mpz_init(product);
+	 
+
+	uint32_t data[32] ; 
+	uint32_t *data_p64 = data + (116 / sizeof(data[0]));
+	uint8_t bhash[7][64];
+	uint32_t hash[8];
+	memcpy(data,input,122);
+
+
+	int M7_MIDSTATE_LEN = 116;
+	for(int i=0; i < 7; i++){
+        mpz_init(bns[i]);
+    }
+
+    sph_sha256_context ctx_final_sha256;
+
+    sph_sha256_context ctx_sha256;
+    sph_sha512_context ctx_sha512;
+    sph_keccak512_context ctx_keccak;
+    sph_whirlpool_context ctx_whirlpool;
+    sph_haval256_5_context ctx_haval;
+    sph_tiger_context ctx_tiger;
+    sph_ripemd160_context ctx_ripemd;
+
+    sph_sha256_init(&ctx_sha256);
+    sph_sha256 (&ctx_sha256, data, M7_MIDSTATE_LEN);
+    
+    sph_sha512_init(&ctx_sha512);
+    sph_sha512 (&ctx_sha512, data, M7_MIDSTATE_LEN);
+    
+    sph_keccak512_init(&ctx_keccak);
+    sph_keccak512 (&ctx_keccak, data, M7_MIDSTATE_LEN);
+
+    sph_whirlpool_init(&ctx_whirlpool);
+    sph_whirlpool (&ctx_whirlpool, data, M7_MIDSTATE_LEN);
+    
+    sph_haval256_5_init(&ctx_haval);
+    sph_haval256_5 (&ctx_haval, data, M7_MIDSTATE_LEN);
+
+    sph_tiger_init(&ctx_tiger);
+    sph_tiger (&ctx_tiger, data, M7_MIDSTATE_LEN);
+
+    sph_ripemd160_init(&ctx_ripemd);
+    sph_ripemd160 (&ctx_ripemd, data, M7_MIDSTATE_LEN);
+
+    sph_sha256_context ctx2_sha256;
+    sph_sha512_context ctx2_sha512; 
+    sph_keccak512_context ctx2_keccak;
+    sph_whirlpool_context ctx2_whirlpool;
+    sph_haval256_5_context ctx2_haval;
+    sph_tiger_context ctx2_tiger;
+    sph_ripemd160_context ctx2_ripemd;
+
+        data[29] = TheNonce;
+
+        memset(bhash, 0, 7 * 64);
+
+        ctx2_sha256 = ctx_sha256;
+        sph_sha256 (&ctx2_sha256, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_sha256_close(&ctx2_sha256, (void*)(bhash[0]));
+
+        ctx2_sha512 = ctx_sha512;
+        sph_sha512 (&ctx2_sha512, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_sha512_close(&ctx2_sha512, (void*)(bhash[1]));
+        
+        ctx2_keccak = ctx_keccak;
+        sph_keccak512 (&ctx2_keccak, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2]));
+
+        ctx2_whirlpool = ctx_whirlpool;
+        sph_whirlpool (&ctx2_whirlpool, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3]));
+        
+        ctx2_haval = ctx_haval;
+        sph_haval256_5 (&ctx2_haval, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4]));
+
+        ctx2_tiger = ctx_tiger;
+        sph_tiger (&ctx2_tiger, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_tiger_close(&ctx2_tiger, (void*)(bhash[5]));
+
+        ctx2_ripemd = ctx_ripemd;
+        sph_ripemd160 (&ctx2_ripemd, data_p64, 122 - M7_MIDSTATE_LEN);
+        sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6]));
+if (debug == 1) {
+		for (int i=0;i<16;i++) {applog(LOG_INFO,"sha256[%d]=%02x %02x %02x %02x sha512[%d]=%02x %02x %02x %02x keccak[%d]=%02x %02x %02x %02x whirlpool[2][%d]=%02x %02x %02x %02x haval[%d]=%02x %02x %02x %02x tiger[%d]=%02x %02x %02x %02x ripemd[%d]=%02x %02x %02x %02x\n",
+        i,bhash[0][4*i+3],bhash[0][4*i+2],bhash[0][4*i+1],bhash[0][4*i+0],
+        i,bhash[1][4*i+3],bhash[1][4*i+2],bhash[1][4*i+1],bhash[1][4*i+0],
+		i,bhash[2][4*i+3],bhash[2][4*i+2],bhash[2][4*i+1],bhash[2][4*i+0],
+		i,bhash[3][4*i+3],bhash[3][4*i+2],bhash[3][4*i+1],bhash[3][4*i+0],
+		i,bhash[4][4*i+3],bhash[4][4*i+2],bhash[4][4*i+1],bhash[4][4*i+0],
+		i,bhash[5][4*i+3],bhash[5][4*i+2],bhash[5][4*i+1],bhash[5][4*i+0],
+		i,bhash[6][4*i+3],bhash[6][4*i+2],bhash[6][4*i+1],bhash[6][4*i+0]
+	);}
+}
+        for(int i=0; i < 7; i++){
+            set_one_if_zero(bhash[i]);
+            mpz_set_uint512(bns[i],bhash[i]);
+        }
+        
+        for(int i=6; i > 0; i--){
+            mpz_mul(bns[i-1], bns[i-1], bns[i]);
+        }
+
+        int bytes = mpz_sizeinbase(bns[0], 256);
+        bdata = (uint8_t *)realloc(bdata, bytes);
+        mpz_export((void *)bdata, NULL, -1, 1, 0, 0, bns[0]);
+       sph_sha256_init(&ctx_final_sha256);
+        sph_sha256 (&ctx_final_sha256, bdata, bytes);
+        sph_sha256_close(&ctx_final_sha256, (void*)(hash));
+
+    memcpy(state, hash, 32);
+}
+*/
+extern float tp_coef[8];
+extern bool opt_benchmark;
+
+
+extern "C" int scanhash_m7(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long  *hashes_done)
+{
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	
+//	const int throughput = 256*256*16;
+	const int throughput = 2560*512*1;
+
+	const uint32_t FirstNonce = pdata[29];
+ 
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+
+		cudaSetDevice(device_map[thr_id]);
+		cudaMalloc(&d_prod0[thr_id],      35 *sizeof(uint64_t) * throughput*tp_coef[thr_id]);
+		cudaMalloc(&d_prod1[thr_id],      38 *sizeof(uint64_t) * throughput*tp_coef[thr_id]);
+		cudaMalloc(&KeccakH[thr_id],     8 *sizeof(uint64_t) * throughput*tp_coef[thr_id]);
+        cudaMalloc(&Sha512H[thr_id],     8 *sizeof(uint64_t) * throughput*tp_coef[thr_id]);
+
+		   m7_sha256_cpu_init(thr_id, throughput*tp_coef[thr_id]);		
+		      sha512_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+		m7_keccak512_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+		    haval256_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+            tiger192_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+		whirlpool512_cpu_init(thr_id, throughput*tp_coef[thr_id],0);	
+		   ripemd160_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+		 quark_check_cpu_init(thr_id, throughput*tp_coef[thr_id]);
+		       m7_bigmul_init(thr_id, throughput*tp_coef[thr_id]);
+			   mul_init();
+		init[thr_id] = true; 
+	}
+	
+	const uint32_t Htarg = ptarget[7];
+
+	whirlpool512_setBlock_120((void*)pdata);
+  	   m7_sha256_setBlock_120((void*)pdata,ptarget);
+	      sha512_setBlock_120((void*)pdata);
+	    haval256_setBlock_120((void*)pdata);
+	m7_keccak512_setBlock_120((void*)pdata);
+	   ripemd160_setBlock_120((void*)pdata);
+	    tiger192_setBlock_120((void*)pdata);
+	quark_check_cpu_setTarget(ptarget);
+	
+	do {
+
+		int order = 0;
+
+          
+		  m7_keccak512_cpu_hash(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+         
+		   m7_sha512_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], Sha512H[thr_id], order++);
+
+    cpu_mulT4(0, throughput*tp_coef[thr_id], 8, 8, Sha512H[thr_id], KeccakH[thr_id], d_prod0[thr_id],order); //64
+	MyStreamSynchronize(0,order++,thr_id);
+
+      m7_whirlpool512_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+
+	cpu_mulT4(0, throughput*tp_coef[thr_id],8, 16, KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //128
+	MyStreamSynchronize(0,order++,thr_id);
+
+m7_sha256_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+cpu_mulT4(0, throughput*tp_coef[thr_id], 4, 24, KeccakH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); //96
+	MyStreamSynchronize(0,order++,thr_id);
+
+		   m7_haval256_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+cpu_mulT4(0, throughput*tp_coef[thr_id], 4, 28, KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order);  //112
+	MyStreamSynchronize(0,order++,thr_id);
+		
+		m7_tiger192_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+	m7_bigmul_unroll1_cpu(thr_id, throughput*tp_coef[thr_id], KeccakH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order);
+	MyStreamSynchronize(0,order++,thr_id);
+		
+		 m7_ripemd160_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++);
+
+	m7_bigmul_unroll2_cpu(thr_id, throughput*tp_coef[thr_id], KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order);
+	MyStreamSynchronize(0,order++,thr_id);
+
+
+uint32_t foundNonce = m7_sha256_cpu_hash_300(thr_id, throughput*tp_coef[thr_id], pdata[29], NULL, d_prod1[thr_id], order);
+if  (foundNonce != 0xffffffff) {
+			uint32_t vhash64[8];
+//			m7_hash(vhash64, pdata,foundNonce,0);
+			
+//            if( (vhash64[7]<=Htarg )  ) {              
+                pdata[29] = foundNonce;
+				*hashes_done = foundNonce - FirstNonce + 1;
+				return 1;
+//			} else {
+//				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg);
+//			m7_hash(vhash64, pdata,foundNonce,1);
+//			} 
+        } // foundNonce
+		pdata[29] += throughput*tp_coef[thr_id];
+*hashes_done +=throughput*tp_coef[thr_id];
+	} while (pdata[29] < max_nonce && !work_restart[thr_id].restart);
+
+//*hashes_done = pdata[29] - FirstNonce + 1;
+	return 0;
+}
diff --git a/x13/m7_keccak512.cu b/x13/m7_keccak512.cu
new file mode 100644
index 0000000000..8a295d4fdc
--- /dev/null
+++ b/x13/m7_keccak512.cu
@@ -0,0 +1,387 @@
+
+#include <cuda.h>
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+
+extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
+extern int compute_version[8];
+
+#include "cuda_helper.h"
+static __constant__ uint64_t stateo[25];
+static __constant__ uint64_t RC[24];
+static const uint64_t cpu_RC[24] = {
+    0x0000000000000001ull, 0x0000000000008082ull,
+    0x800000000000808aull, 0x8000000080008000ull,
+    0x000000000000808bull, 0x0000000080000001ull,
+    0x8000000080008081ull, 0x8000000000008009ull,
+    0x000000000000008aull, 0x0000000000000088ull,
+    0x0000000080008009ull, 0x000000008000000aull,
+    0x000000008000808bull, 0x800000000000008bull,
+    0x8000000000008089ull, 0x8000000000008003ull,
+    0x8000000000008002ull, 0x8000000000000080ull,
+    0x000000000000800aull, 0x800000008000000aull,
+    0x8000000080008081ull, 0x8000000000008080ull,
+    0x0000000080000001ull, 0x8000000080008008ull
+};
+
+static __device__ __forceinline__ void keccak_block(uint64_t *s, const uint64_t *keccak_round_constants) {
+    size_t i;
+    uint64_t t[5], u[5], v, w;
+
+    /* absorb input */    
+    
+//#pragma unroll 24
+    for (i = 0; i < 24; i++) {
+        /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+		
+        t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+        t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+        t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+        t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+        t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; 
+		 
+        /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+		
+		uint64_t temp0,temp1,temp2,temp3,temp4;
+        temp0 = ROTL64(t[0], 1);
+		temp1 = ROTL64(t[1], 1);
+		temp2 = ROTL64(t[2], 1);
+		temp3 = ROTL64(t[3], 1);
+		temp4 = ROTL64(t[4], 1);
+		u[0] = xor1(t[4],temp1);
+        u[1] = xor1(t[0],temp2);
+        u[2] = xor1(t[1],temp3);
+        u[3] = xor1(t[2],temp4);
+        u[4] = xor1(t[3],temp0);
+		
+        /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+        s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+        s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+        s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+        s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+        s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+        /* rho pi: b[..] = rotl(a[..], ..) */
+        v = s[ 1];
+        s[ 1] = ROTL64(s[ 6], 44);
+        s[ 6] = ROTL64(s[ 9], 20);
+        s[ 9] = ROTL64(s[22], 61);
+        s[22] = ROTL64(s[14], 39);
+        s[14] = ROTL64(s[20], 18);
+        s[20] = ROTL64(s[ 2], 62);
+        s[ 2] = ROTL64(s[12], 43);
+        s[12] = ROTL64(s[13], 25);
+        s[13] = ROTL64(s[19],  8);
+        s[19] = ROTL64(s[23], 56);
+        s[23] = ROTL64(s[15], 41);
+        s[15] = ROTL64(s[ 4], 27);
+        s[ 4] = ROTL64(s[24], 14);
+        s[24] = ROTL64(s[21],  2);
+        s[21] = ROTL64(s[ 8], 55);
+        s[ 8] = ROTL64(s[16], 45);
+        s[16] = ROTL64(s[ 5], 36);
+        s[ 5] = ROTL64(s[ 3], 28);
+        s[ 3] = ROTL64(s[18], 21);
+        s[18] = ROTL64(s[17], 15);
+        s[17] = ROTL64(s[11], 10);
+        s[11] = ROTL64(s[ 7],  6);
+        s[ 7] = ROTL64(s[10],  3);
+        s[10] = ROTL64(    v,  1);
+
+        /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */		
+
+		v = s[ 0]; w = s[ 1]; 
+		s[ 0] ^= (~w) & s[ 2]; 
+		s[ 1] ^= (~s[ 2]) & s[ 3]; 
+		s[ 2] ^= (~s[ 3]) & s[ 4]; 
+		s[ 3] ^= (~s[ 4]) & v; 
+		s[ 4] ^= (~v) & w;
+		v = s[ 5]; w = s[ 6];
+		s[ 5] ^= (~w) & s[ 7];
+		s[ 6] ^= (~s[ 7]) & s[ 8];
+		s[ 7] ^= (~s[ 8]) & s[ 9];
+		s[ 8] ^= (~s[ 9]) & v;
+		s[ 9] ^= (~v) & w;
+        v = s[10]; w = s[11];
+		s[10] ^= (~w) & s[12];
+		s[11] ^= (~s[12]) & s[13];
+		s[12] ^= (~s[13]) & s[14];
+		s[13] ^= (~s[14]) & v;
+		s[14] ^= (~v) & w;
+        v = s[15]; w = s[16];
+		s[15] ^= (~w) & s[17];
+		s[16] ^= (~s[17]) & s[18];
+		s[17] ^= (~s[18]) & s[19];
+		s[18] ^= (~s[19]) & v;
+		s[19] ^= (~v) & w;
+        v = s[20]; w = s[21];
+		s[20] ^= (~w) & s[22];
+		s[21] ^= (~s[22]) & s[23];
+		s[22] ^= (~s[23]) & s[24];
+		s[23] ^= (~s[24]) & v;
+        s[24] ^= (~v) & w;
+		
+        /* iota: a[0,0] ^= round constant */
+        s[0] ^= keccak_round_constants[i];
+    }
+}
+
+static __device__ __forceinline__ void keccak_blockv35(uint2 *s, const uint64_t *keccak_round_constants) {
+	size_t i;
+	uint2 t[5], u[5], v, w;
+
+
+	for (i = 0; i < 24; i++) {
+		/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+		t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+		t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+		t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+		t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+		t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+		/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+		u[0] = t[4] ^ ROL2(t[1], 1);
+		u[1] = t[0] ^ ROL2(t[2], 1);
+		u[2] = t[1] ^ ROL2(t[3], 1);
+		u[3] = t[2] ^ ROL2(t[4], 1);
+		u[4] = t[3] ^ ROL2(t[0], 1);
+
+		/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+		s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+		s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+		s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+		s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+		s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+		/* rho pi: b[..] = rotl(a[..], ..) */
+		v = s[1];
+		s[1] = ROL2(s[6], 44);
+		s[6] = ROL2(s[9], 20);
+		s[9] = ROL2(s[22], 61);
+		s[22] = ROL2(s[14], 39);
+		s[14] = ROL2(s[20], 18);
+		s[20] = ROL2(s[2], 62);
+		s[2] = ROL2(s[12], 43);
+		s[12] = ROL2(s[13], 25);
+		s[13] = ROL2(s[19], 8);
+		s[19] = ROL2(s[23], 56);
+		s[23] = ROL2(s[15], 41);
+		s[15] = ROL2(s[4], 27);
+		s[4] = ROL2(s[24], 14);
+		s[24] = ROL2(s[21], 2);
+		s[21] = ROL2(s[8], 55);
+		s[8] = ROL2(s[16], 45);
+		s[16] = ROL2(s[5], 36);
+		s[5] = ROL2(s[3], 28);
+		s[3] = ROL2(s[18], 21);
+		s[18] = ROL2(s[17], 15);
+		s[17] = ROL2(s[11], 10);
+		s[11] = ROL2(s[7], 6);
+		s[7] = ROL2(s[10], 3);
+		s[10] = ROL2(v, 1);
+
+		/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
+		v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w;
+		v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w;
+		v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
+		v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
+		v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
+
+		/* iota: a[0,0] ^= round constant */
+		s[0] ^= vectorize(keccak_round_constants[i]);
+	}
+}
+
+
+static __forceinline__ void keccak_block_host(uint64_t *s, const uint64_t *keccak_round_constants) {
+    size_t i;
+    uint64_t t[5], u[5], v, w;
+
+    /* absorb input */    
+    
+    for (i = 0; i < 24; i++) {
+        /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
+        t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
+        t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
+        t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
+        t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
+        t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
+
+        /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
+        u[0] = t[4] ^ ROTL64(t[1], 1);
+        u[1] = t[0] ^ ROTL64(t[2], 1);
+        u[2] = t[1] ^ ROTL64(t[3], 1);
+        u[3] = t[2] ^ ROTL64(t[4], 1);
+        u[4] = t[3] ^ ROTL64(t[0], 1);
+
+        /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
+        s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
+        s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
+        s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
+        s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
+        s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
+
+        /* rho pi: b[..] = rotl(a[..], ..) */
+        v = s[ 1];
+        s[ 1] = ROTL64(s[ 6], 44);
+        s[ 6] = ROTL64(s[ 9], 20);
+        s[ 9] = ROTL64(s[22], 61);
+        s[22] = ROTL64(s[14], 39);
+        s[14] = ROTL64(s[20], 18);
+        s[20] = ROTL64(s[ 2], 62);
+        s[ 2] = ROTL64(s[12], 43);
+        s[12] = ROTL64(s[13], 25);
+        s[13] = ROTL64(s[19],  8);
+        s[19] = ROTL64(s[23], 56);
+        s[23] = ROTL64(s[15], 41);
+        s[15] = ROTL64(s[ 4], 27);
+        s[ 4] = ROTL64(s[24], 14);
+        s[24] = ROTL64(s[21],  2);
+        s[21] = ROTL64(s[ 8], 55);
+        s[ 8] = ROTL64(s[16], 45);
+        s[16] = ROTL64(s[ 5], 36);
+        s[ 5] = ROTL64(s[ 3], 28);
+        s[ 3] = ROTL64(s[18], 21);
+        s[18] = ROTL64(s[17], 15);
+        s[17] = ROTL64(s[11], 10);
+        s[11] = ROTL64(s[ 7],  6);
+        s[ 7] = ROTL64(s[10],  3);
+        s[10] = ROTL64(    v,  1);
+
+        /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
+        v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
+        v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
+        v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
+        v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
+        v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
+
+        /* iota: a[0,0] ^= round constant */
+        s[0] ^= keccak_round_constants[i];
+    }
+}
+
+
+
+ __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
+
+
+
+__global__ void  m7_keccak512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+    int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+    if (thread < threads)
+    {
+        
+		uint32_t nounce = startNounce + thread;
+
+         uint64_t state[25];
+
+        #pragma unroll 16
+		 for (int i=9;i<25;i++) {state[i]=stateo[i];}
+
+		state[0] = xor1(stateo[0],c_PaddedMessage80[9]);
+		state[1] = xor1(stateo[1],c_PaddedMessage80[10]);
+		state[2] = xor1(stateo[2],c_PaddedMessage80[11]);
+		state[3] = xor1(stateo[3],c_PaddedMessage80[12]);
+		state[4] = xor1(stateo[4],c_PaddedMessage80[13]);
+		state[5] = xor1(stateo[5],REPLACE_HIWORD(c_PaddedMessage80[14],nounce));
+		state[6] = xor1(stateo[6],c_PaddedMessage80[15]);
+		state[7] = stateo[7];
+		state[8] = xor1(stateo[8],0x8000000000000000);
+		 
+		keccak_block(state,RC);
+
+#pragma unroll 8 
+for (int i=0;i<8;i++) {outputHash[i*threads+thread]=state[i];}
+
+
+	} //thread
+}
+
+__global__ void  __launch_bounds__(256, 3) m7_keccak512_gpu_hash_120_v35(int threads, uint32_t startNounce, uint64_t *outputHash)
+{
+
+	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
+	if (thread < threads)
+	{
+
+		uint32_t nounce = startNounce + thread;
+
+		uint2 state[25];
+
+#pragma unroll 25
+		for (int i = 0; i<25; i++) { state[i] = vectorize(stateo[i]); }
+
+		state[0] ^= vectorize(c_PaddedMessage80[9]);
+		state[1] ^= vectorize(c_PaddedMessage80[10]);
+		state[2] ^= vectorize(c_PaddedMessage80[11]);
+		state[3] ^= vectorize(c_PaddedMessage80[12]);
+		state[4] ^= vectorize(c_PaddedMessage80[13]);
+		state[5] ^= make_uint2(((uint32_t*)c_PaddedMessage80)[28],nounce);
+		state[6] ^= vectorize(c_PaddedMessage80[15]);
+		
+		state[8] ^= make_uint2(0,0x80000000);
+
+		keccak_blockv35(state, RC);
+
+#pragma unroll 8 
+		for (int i = 0; i<8; i++) { outputHash[i*threads + thread] = devectorize(state[i]); }
+
+
+	} //thread
+}
+
+
+void m7_keccak512_cpu_init(int thr_id, int threads)
+{
+    	
+	cudaMemcpyToSymbol( RC,cpu_RC,sizeof(cpu_RC),0,cudaMemcpyHostToDevice);	
+} 
+
+__host__ void m7_keccak512_setBlock_120(void *pdata)
+{
+
+	unsigned char PaddedMessage[128];
+	uint8_t ending =0x01;
+	memcpy(PaddedMessage, pdata, 122);
+	memset(PaddedMessage+122,ending,1); 
+	memset(PaddedMessage+123, 0, 5); 
+	cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+	uint64_t* alt_data = (uint64_t*) pdata;
+         uint64_t state[25];
+		 for(int i=0;i<25;i++) {state[i]=0;}
+
+
+		for (int i=0;i<9;i++) {state[i]  ^= alt_data[i];}
+		keccak_block_host(state,cpu_RC);
+
+		cudaMemcpyToSymbol(stateo, state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice);
+
+}
+
+
+__host__ void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order)
+{
+    const int threadsperblock = 256;
+
+    dim3 grid(threads/threadsperblock);
+    dim3 block(threadsperblock);
+
+    size_t shared_size = 0;
+	if (compute_version[thr_id]<35) {
+    m7_keccak512_gpu_hash_120<<<grid, block, shared_size>>>(threads, startNounce, d_hash);
+	}
+	else {
+	m7_keccak512_gpu_hash_120_v35 << <grid, block, shared_size >> >(threads, startNounce, d_hash);
+	}
+
+    MyStreamSynchronize(NULL, order, thr_id);
+}
+
diff --git a/x13/whirlpool.cu b/x13/whirlpool.cu
new file mode 100644
index 0000000000..4a9a6521ff
--- /dev/null
+++ b/x13/whirlpool.cu
@@ -0,0 +1,128 @@
+/*
+ * whirlpool routine for new algorithm
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_whirlpool.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void whirlpool512_cpu_init(int thr_id, int threads, int flag);
+extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget);
+extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+// fresh Hashfunktion
+inline void wh_hash(void *state, const void *input)
+{
+    // shavite-simd-shavite-simd-echo
+
+    
+    sph_whirlpool_context ctx_whirlpool;
+    
+	
+    uint32_t hash[16];
+
+	// shavite 1
+    sph_whirlpool1_init(&ctx_whirlpool);
+    sph_whirlpool1 (&ctx_whirlpool, input, 80);
+    sph_whirlpool1_close(&ctx_whirlpool, (void*) hash);
+	
+
+	sph_whirlpool1_init(&ctx_whirlpool);
+    sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); 
+	
+	sph_whirlpool1_init(&ctx_whirlpool);
+    sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); 
+
+	sph_whirlpool1_init(&ctx_whirlpool);
+    sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); 
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_wh(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8*4;
+	
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		whirlpool512_cpu_init(thr_id, throughput,1);
+		
+//		quark_check_cpu_init(thr_id, throughput);
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++) {
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);	}
+	whirlpool512_setBlock_80((void*)endiandata, ptarget);
+//	quark_check_cpu_setTarget(ptarget);
+	do {
+		int order = 0;
+		 
+		whirlpool512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);		  		
+		whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);		
+		whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);		
+		uint32_t foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+
+			wh_hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg);
+			}
+		}
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/x13/x14.cu b/x13/x14.cu
new file mode 100644
index 0000000000..4dd7cdba5c
--- /dev/null
+++ b/x13/x14.cu
@@ -0,0 +1,311 @@
+/*
+ * X14 algorithm built on cbuchner1's original X11
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+
+#include "sph/sph_shabal.h"
+
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_shabal512_cpu_init(int thr_id, int threads);
+extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+/*
+extern void x13_whirlpool512_cpu_init(int thr_id, int threads);
+extern void x13_whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+*/
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+// X13 Hashfunktion
+inline void x14hash(void *state, const void *input)
+{
+    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14
+
+    sph_blake512_context ctx_blake;
+    sph_bmw512_context ctx_bmw;
+    sph_groestl512_context ctx_groestl;
+    sph_jh512_context ctx_jh;
+    sph_keccak512_context ctx_keccak;
+    sph_skein512_context ctx_skein;
+    sph_luffa512_context ctx_luffa;
+    sph_cubehash512_context ctx_cubehash;
+    sph_shavite512_context ctx_shavite;
+    sph_simd512_context ctx_simd;
+    sph_echo512_context ctx_echo;
+    sph_hamsi512_context ctx_hamsi;
+    sph_fugue512_context ctx_fugue;
+	sph_shabal512_context  ctx_shabal;
+    
+
+    uint32_t hash[16];
+
+    sph_blake512_init(&ctx_blake);
+    // ZBLAKE;
+    sph_blake512 (&ctx_blake, input, 80);
+    sph_blake512_close(&ctx_blake, (void*) hash);
+
+    sph_bmw512_init(&ctx_bmw);
+    // ZBMW;
+    sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+    sph_bmw512_close(&ctx_bmw, (void*) hash);
+
+    sph_groestl512_init(&ctx_groestl);
+    // ZGROESTL;
+    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+    sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+    sph_skein512_init(&ctx_skein);
+    // ZSKEIN;
+    sph_skein512 (&ctx_skein, (const void*) hash, 64);
+    sph_skein512_close(&ctx_skein, (void*) hash);
+
+    sph_jh512_init(&ctx_jh);
+    // ZJH;
+    sph_jh512 (&ctx_jh, (const void*) hash, 64);
+    sph_jh512_close(&ctx_jh, (void*) hash);
+
+    sph_keccak512_init(&ctx_keccak);
+    // ZKECCAK;
+    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+    sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+    sph_luffa512_init(&ctx_luffa);
+    // ZLUFFA;
+    sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+    sph_luffa512_close (&ctx_luffa, (void*) hash);
+
+    sph_cubehash512_init(&ctx_cubehash);
+    // ZCUBEHASH;
+    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+    sph_shavite512_init(&ctx_shavite);
+    // ZSHAVITE;
+    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+    sph_simd512_init(&ctx_simd);
+    // ZSIMD
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+    sph_echo512_init(&ctx_echo);
+    // ZECHO
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    sph_hamsi512_init(&ctx_hamsi);
+    sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
+    sph_hamsi512_close(&ctx_hamsi, (void*) hash); 
+
+    sph_fugue512_init(&ctx_fugue);
+    sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
+    sph_fugue512_close(&ctx_fugue, (void*) hash); 
+
+	sph_shabal512_init(&ctx_shabal);
+    sph_shabal512 (&ctx_shabal, (const void*) hash, 64);
+    sph_shabal512_close(&ctx_shabal, (void*) hash); 
+
+    
+
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		x13_hamsi512_cpu_init(thr_id, throughput);
+		x13_fugue512_cpu_init(thr_id, throughput);
+		x13_shabal512_cpu_init(thr_id, throughput);
+		
+
+
+		quark_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes Blake512 Hash mit CUDA
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r BMW512
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Groestl512
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Skein512
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r JH512
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Keccak512
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Luffa512
+		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Cubehash512
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Shavite512
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r ECHO512
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+        x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		
+		
+	
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			x14hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/x13/x15.cu b/x13/x15.cu
new file mode 100644
index 0000000000..9e51ebcc83
--- /dev/null
+++ b/x13/x15.cu
@@ -0,0 +1,313 @@
+/*
+ * X15 algorithm built on cbuchner1's original X11
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+
+#include "sph/sph_shabal.h"
+#include "sph/sph_whirlpool.h"
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_shabal512_cpu_init(int thr_id, int threads);
+extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void whirlpool512_cpu_init(int thr_id, int threads,int flag);
+extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+// X13 Hashfunktion
+inline void x15hash(void *state, const void *input)
+{
+    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14-whirlpool15
+
+    sph_blake512_context ctx_blake;
+    sph_bmw512_context ctx_bmw;
+    sph_groestl512_context ctx_groestl;
+    sph_jh512_context ctx_jh;
+    sph_keccak512_context ctx_keccak;
+    sph_skein512_context ctx_skein;
+    sph_luffa512_context ctx_luffa;
+    sph_cubehash512_context ctx_cubehash;
+    sph_shavite512_context ctx_shavite;
+    sph_simd512_context ctx_simd;
+    sph_echo512_context ctx_echo;
+    sph_hamsi512_context ctx_hamsi;
+    sph_fugue512_context ctx_fugue;
+	sph_shabal512_context  ctx_shabal;
+    sph_whirlpool_context  ctx_whirlpool;
+
+    uint32_t hash[16];
+
+    sph_blake512_init(&ctx_blake);
+    // ZBLAKE;
+    sph_blake512 (&ctx_blake, input, 80);
+    sph_blake512_close(&ctx_blake, (void*) hash);
+
+    sph_bmw512_init(&ctx_bmw);
+    // ZBMW;
+    sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+    sph_bmw512_close(&ctx_bmw, (void*) hash);
+
+    sph_groestl512_init(&ctx_groestl);
+    // ZGROESTL;
+    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+    sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+    sph_skein512_init(&ctx_skein);
+    // ZSKEIN;
+    sph_skein512 (&ctx_skein, (const void*) hash, 64);
+    sph_skein512_close(&ctx_skein, (void*) hash);
+
+    sph_jh512_init(&ctx_jh);
+    // ZJH;
+    sph_jh512 (&ctx_jh, (const void*) hash, 64);
+    sph_jh512_close(&ctx_jh, (void*) hash);
+
+    sph_keccak512_init(&ctx_keccak);
+    // ZKECCAK;
+    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+    sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+    sph_luffa512_init(&ctx_luffa);
+    // ZLUFFA;
+    sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+    sph_luffa512_close (&ctx_luffa, (void*) hash);
+
+    sph_cubehash512_init(&ctx_cubehash);
+    // ZCUBEHASH;
+    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+    sph_shavite512_init(&ctx_shavite);
+    // ZSHAVITE;
+    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+    sph_simd512_init(&ctx_simd);
+    // ZSIMD
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+    sph_echo512_init(&ctx_echo);
+    // ZECHO
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    sph_hamsi512_init(&ctx_hamsi);
+    sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
+    sph_hamsi512_close(&ctx_hamsi, (void*) hash); 
+
+    sph_fugue512_init(&ctx_fugue);
+    sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
+    sph_fugue512_close(&ctx_fugue, (void*) hash); 
+
+	sph_shabal512_init(&ctx_shabal);
+    sph_shabal512 (&ctx_shabal, (const void*) hash, 64);
+    sph_shabal512_close(&ctx_shabal, (void*) hash); 
+
+    sph_whirlpool_init(&ctx_whirlpool);
+    sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool_close(&ctx_whirlpool, (void*) hash); 
+
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		quark_blake512_cpu_init(thr_id, throughput);
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		x13_hamsi512_cpu_init(thr_id, throughput);
+		x13_fugue512_cpu_init(thr_id, throughput);
+		x13_shabal512_cpu_init(thr_id, throughput);
+		whirlpool512_cpu_init(thr_id, throughput,0);
+
+
+		quark_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes Blake512 Hash mit CUDA
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r BMW512
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Groestl512
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Skein512
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r JH512
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Keccak512
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Luffa512
+		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Cubehash512
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Shavite512
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r ECHO512
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+        x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		
+		whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+	
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			x15hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}
diff --git a/x13/x17.cu b/x13/x17.cu
new file mode 100644
index 0000000000..aeb2a41bfb
--- /dev/null
+++ b/x13/x17.cu
@@ -0,0 +1,343 @@
+/*
+ * X17 algorithm built on cbuchner1's original X11
+ * 
+ */
+
+extern "C"
+{
+#include "sph/sph_blake.h"
+#include "sph/sph_bmw.h"
+#include "sph/sph_groestl.h"
+#include "sph/sph_skein.h"
+#include "sph/sph_jh.h"
+#include "sph/sph_keccak.h"
+
+#include "sph/sph_luffa.h"
+#include "sph/sph_cubehash.h"
+#include "sph/sph_shavite.h"
+#include "sph/sph_simd.h"
+#include "sph/sph_echo.h"
+
+#include "sph/sph_hamsi.h"
+#include "sph/sph_fugue.h"
+
+#include "sph/sph_shabal.h"
+#include "sph/sph_whirlpool.h"
+#include "sph/sph_sha2.h"
+#include "sph/sph_haval.h"
+
+
+#include "miner.h"
+}
+
+// aus cpu-miner.c
+extern int device_map[8];
+
+// Speicher f�r Input/Output der verketteten Hashfunktionen
+static uint32_t *d_hash[8];
+
+extern void quark_blake512_cpu_init(int thr_id, int threads);
+extern void quark_blake512_cpu_setBlock_80(void *pdata);
+extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
+extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_bmw512_cpu_init(int thr_id, int threads);
+extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_groestl512_cpu_init(int thr_id, int threads);
+extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_skein512_cpu_init(int thr_id, int threads);
+extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_keccak512_cpu_init(int thr_id, int threads);
+extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void quark_jh512_cpu_init(int thr_id, int threads);
+extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_luffa512_cpu_init(int thr_id, int threads);
+extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_cubehash512_cpu_init(int thr_id, int threads);
+extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_shavite512_cpu_init(int thr_id, int threads);
+extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_simd512_cpu_init(int thr_id, int threads);
+extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x11_echo512_cpu_init(int thr_id, int threads);
+extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_hamsi512_cpu_init(int thr_id, int threads);
+extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_fugue512_cpu_init(int thr_id, int threads);
+extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void x13_shabal512_cpu_init(int thr_id, int threads);
+extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void whirlpool512_cpu_init(int thr_id, int threads, int flag);
+extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void sha512_cpu_init(int thr_id, int threads);
+extern void sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+extern void haval256_cpu_init(int thr_id, int threads);
+extern void haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
+
+
+
+extern void quark_check_cpu_init(int thr_id, int threads);
+extern void quark_check_cpu_setTarget(const void *ptarget);
+extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
+
+extern void quark_compactTest_cpu_init(int thr_id, int threads);
+extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, 
+											uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
+											int order);
+
+// X13 Hashfunktion
+inline void x17hash(void *state, const void *input)
+{
+    // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14-whirlpool15
+
+    sph_blake512_context ctx_blake;
+    sph_bmw512_context ctx_bmw;
+    sph_groestl512_context ctx_groestl;
+    sph_jh512_context ctx_jh;
+    sph_keccak512_context ctx_keccak;
+    sph_skein512_context ctx_skein;
+    sph_luffa512_context ctx_luffa;
+    sph_cubehash512_context ctx_cubehash;
+    sph_shavite512_context ctx_shavite;
+    sph_simd512_context ctx_simd;
+    sph_echo512_context ctx_echo;
+    sph_hamsi512_context ctx_hamsi;
+    sph_fugue512_context ctx_fugue;
+	sph_shabal512_context  ctx_shabal;
+    sph_whirlpool_context  ctx_whirlpool;
+	sph_sha512_context ctx_sha512;
+	sph_haval256_5_context ctx_haval;
+
+    uint32_t hash[16];
+
+    sph_blake512_init(&ctx_blake);
+    // ZBLAKE;
+    sph_blake512 (&ctx_blake, input, 80);
+    sph_blake512_close(&ctx_blake, (void*) hash);
+
+    sph_bmw512_init(&ctx_bmw);
+    // ZBMW;
+    sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+    sph_bmw512_close(&ctx_bmw, (void*) hash);
+
+    sph_groestl512_init(&ctx_groestl);
+    // ZGROESTL;
+    sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+    sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+    sph_skein512_init(&ctx_skein);
+    // ZSKEIN;
+    sph_skein512 (&ctx_skein, (const void*) hash, 64);
+    sph_skein512_close(&ctx_skein, (void*) hash);
+
+    sph_jh512_init(&ctx_jh);
+    // ZJH;
+    sph_jh512 (&ctx_jh, (const void*) hash, 64);
+    sph_jh512_close(&ctx_jh, (void*) hash);
+
+    sph_keccak512_init(&ctx_keccak);
+    // ZKECCAK;
+    sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+    sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+    sph_luffa512_init(&ctx_luffa);
+    // ZLUFFA;
+    sph_luffa512 (&ctx_luffa, (const void*) hash, 64);
+    sph_luffa512_close (&ctx_luffa, (void*) hash);
+
+    sph_cubehash512_init(&ctx_cubehash);
+    // ZCUBEHASH;
+    sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64);
+    sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+    sph_shavite512_init(&ctx_shavite);
+    // ZSHAVITE;
+    sph_shavite512 (&ctx_shavite, (const void*) hash, 64);
+    sph_shavite512_close(&ctx_shavite, (void*) hash);
+
+    sph_simd512_init(&ctx_simd);
+    // ZSIMD
+    sph_simd512 (&ctx_simd, (const void*) hash, 64);
+    sph_simd512_close(&ctx_simd, (void*) hash);
+
+    sph_echo512_init(&ctx_echo);
+    // ZECHO
+    sph_echo512 (&ctx_echo, (const void*) hash, 64);
+    sph_echo512_close(&ctx_echo, (void*) hash); 
+
+    sph_hamsi512_init(&ctx_hamsi);
+    sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64);
+    sph_hamsi512_close(&ctx_hamsi, (void*) hash); 
+
+    sph_fugue512_init(&ctx_fugue);
+    sph_fugue512 (&ctx_fugue, (const void*) hash, 64);
+    sph_fugue512_close(&ctx_fugue, (void*) hash); 
+
+	sph_shabal512_init(&ctx_shabal);
+    sph_shabal512 (&ctx_shabal, (const void*) hash, 64);
+    sph_shabal512_close(&ctx_shabal, (void*) hash); 
+
+    sph_whirlpool_init(&ctx_whirlpool);
+    sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64);
+    sph_whirlpool_close(&ctx_whirlpool, (void*) hash); 
+
+	
+	sph_sha512_init(&ctx_sha512);
+    sph_sha512(&ctx_sha512,(const void*) hash, 64);
+    sph_sha512_close(&ctx_sha512,(void*)  hash);
+	
+
+    sph_haval256_5_init(&ctx_haval);
+    sph_haval256_5(&ctx_haval,(const void*) hash, 64);
+    sph_haval256_5_close(&ctx_haval,(void*)  hash);
+
+    memcpy(state, hash, 32);
+}
+
+
+extern bool opt_benchmark;
+
+extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
+    const uint32_t *ptarget, uint32_t max_nonce,
+    unsigned long *hashes_done)
+{
+	const uint32_t first_nonce = pdata[19];
+
+	if (opt_benchmark)
+		((uint32_t*)ptarget)[7] = 0x0000ff;
+
+	const uint32_t Htarg = ptarget[7];
+
+	const int throughput = 256*256*8;
+
+	static bool init[8] = {0,0,0,0,0,0,0,0};
+	if (!init[thr_id])
+	{
+		cudaSetDevice(device_map[thr_id]);
+
+		// Konstanten kopieren, Speicher belegen
+		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		quark_blake512_cpu_init(thr_id, throughput);
+
+		quark_groestl512_cpu_init(thr_id, throughput);
+		quark_skein512_cpu_init(thr_id, throughput);
+		quark_bmw512_cpu_init(thr_id, throughput);
+		quark_keccak512_cpu_init(thr_id, throughput);
+		quark_jh512_cpu_init(thr_id, throughput);
+		x11_luffa512_cpu_init(thr_id, throughput);
+		x11_cubehash512_cpu_init(thr_id, throughput);
+		x11_shavite512_cpu_init(thr_id, throughput);
+		x11_simd512_cpu_init(thr_id, throughput);
+		x11_echo512_cpu_init(thr_id, throughput);
+		x13_hamsi512_cpu_init(thr_id, throughput);
+		x13_fugue512_cpu_init(thr_id, throughput);
+		x13_shabal512_cpu_init(thr_id, throughput);
+		whirlpool512_cpu_init(thr_id, throughput,0);
+
+		sha512_cpu_init(thr_id, throughput);
+
+		haval256_cpu_init(thr_id, throughput);
+
+		quark_check_cpu_init(thr_id, throughput);
+
+		init[thr_id] = true;
+	}
+
+	//unsigned char echobefore[64], echoafter[64];
+
+    uint32_t endiandata[20];
+	for (int k=0; k < 20; k++)
+		be32enc(&endiandata[k], ((uint32_t*)pdata)[k]);
+
+	quark_blake512_cpu_setBlock_80((void*)endiandata);
+	quark_check_cpu_setTarget(ptarget);
+
+	do {
+		int order = 0;
+
+        // erstes Blake512 Hash mit CUDA
+		quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r BMW512
+		quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Groestl512
+		quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Skein512
+		quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r JH512
+		quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Keccak512
+		quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Luffa512
+		x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Cubehash512
+		x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r Shavite512
+		x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r SIMD512
+		x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		// das ist der unbedingte Branch f�r ECHO512
+		x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+        x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		
+		whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+
+		      sha512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		
+		haval256_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+	
+		// Scan nach Gewinner Hashes auf der GPU
+		uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
+		if  (foundNonce != 0xffffffff)
+		{
+			uint32_t vhash64[8];
+			be32enc(&endiandata[19], foundNonce);
+			x17hash(vhash64, endiandata);
+
+			if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) {
+                
+                pdata[19] = foundNonce;
+                *hashes_done = foundNonce - first_nonce + 1;
+                return 1;
+			} else {
+				applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce);
+			}
+		}
+
+		pdata[19] += throughput;
+
+	} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
+
+	*hashes_done = pdata[19] - first_nonce + 1;
+	return 0;
+}